Set up working R environment and import 18S ASV table. Modify input tables and import as phyloseq objects in order to perform quality control removal of contaminant ASVs (decontam)
library(tidyverse);library(reshape2);library(cowplot);library(patchwork);library(phyloseq);library(decontam)
load("data-input/GR-ASVtables-updatedTax.RData", verbose = TRUE)## Loading objects:
## GR_tagseq_longformat
## GR_tagseq_wideformat
Import ASV table as phyloseq object, note control samples.
taxmat <- GR_tagseq_wideformat %>%
select(Feature.ID, Taxon_updated) %>%
separate(Taxon_updated, c("Kingdom","Supergroup","Division","Class","Order","Family","Genus","Species"), sep = ";", remove = FALSE) %>%
column_to_rownames(var = "Feature.ID") %>%
as.matrix## Warning: Expected 8 pieces. Missing pieces filled with `NA` in 4828 rows [3, 5,
## 9, 11, 14, 20, 23, 28, 33, 34, 35, 37, 39, 40, 44, 46, 47, 53, 55, 57, ...].
asvmat <- GR_tagseq_wideformat %>%
select(Feature.ID, starts_with(c("Gorda", "Axial"))) %>%
column_to_rownames(var = "Feature.ID") %>%
as.matrixrow.names(taxmat)<-row.names(asvmat)
# class(asvmat);class(taxmat)
ASV = otu_table(asvmat, taxa_are_rows = TRUE) #phyloseq command
TAX = tax_table(taxmat)
physeq <- phyloseq(ASV, TAX)
physeq #Phyloseq object## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 9175 taxa and 34 samples ]
## tax_table() Taxonomy Table: [ 9175 taxa by 9 taxonomic ranks ]
# Include additional sample names
samplenames <- as.data.frame(colnames(asvmat))
# samplenames; head(asvtab)
colnames(samplenames)[1]<-"SAMPLE"
# Import metadata
ventnames <- read.delim("data-input/ventnames-gordaridge.txt")
# names(ventnames);head(ventnames)
# View(ventnames)
colnames(ventnames)[1]<-"SAMPLE"
#
samplenames_1 <- left_join(samplenames, ventnames)## Joining, by = "SAMPLE"
row.names(samplenames_1)<-sample_names(physeq)
samplenames_1 <- samplenames_1 %>% unite(LocationName_Sampletype, LocationName, Sampletype, sep = " ", remove = FALSE)
# Convert to phyloseq object
sampledata <- sample_data(samplenames_1)
# Merge with other data
physeq_names = merge_phyloseq(physeq, sampledata)
# physeq_names
# sample_data(physeq_names)## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 9175 taxa and 34 samples ]
## sample_data() Sample Data: [ 34 samples by 9 sample variables ]
## tax_table() Taxonomy Table: [ 9175 taxa by 9 taxonomic ranks ]
# Check out library size of my data
df <- as.data.frame(sample_data(physeq_names))
df$LibrarySize <- sample_sums(physeq_names)
df <- df[order(df$LibrarySize),]
df$Index <- seq(nrow(df))
#
ggplot(data=df, aes(x=Index, y=LibrarySize, fill=Sample_or_Control, shape=LOCATION)) +
geom_point(color="black", size=3, aes(shape=LOCATION)) +
scale_shape_manual(values = c(21,22,23)) +
theme_bw() > Shows that out of the 3 ship blanks I have, one of the sames has a pretty large library size, otherwise, control samples have very small library sizes.
# Assign negative control designation
sample_data(physeq_names)$is.neg <- sample_data(physeq_names)$Sample_or_Control == "Control Sample"
# ID contaminants using Prevalence information
contamdf.prev <- isContaminant(physeq_names, method="prevalence", neg="is.neg", threshold = 0.5, normalize = TRUE)
table(contamdf.prev$contaminant) # Report number of ASVs IDed as contamintants##
## FALSE TRUE
## 9141 34
0.5 - this threshold will ID contaminants in all samples that are more prevalent in negative controls than in positive samples. In this study, control samples included 1 lab-based blank and 3 ship-board blanks taken at the time of field study. Results showed 34 ASVs to be considered “contaminants”
# Make phyloseq object of presence-absence in negative controls and true samples
## change to presence absence
gr.pa <- transform_sample_counts(physeq_names, function(abund) 1*(abund>0))
# isolate PA of positive and negative samples
gr.pa.neg <- prune_samples(sample_data(gr.pa)$Sample_or_Control == "Control Sample", gr.pa)
gr.pa.pos <- prune_samples(sample_data(gr.pa)$Sample_or_Control == "True Sample", gr.pa)# Subset TRUE contaminants
contams <- subset(contamdf.prev, contaminant == "TRUE")
contams$Feature.ID <- row.names(contams)
# head(contams);dim(contams)
list_of_contams <- as.character(contams$Feature.ID)
#
# Explore taxa IDed as contaminants
taxa_list <- as.data.frame(taxmat)
taxa_list$Feature.ID <- row.names(taxa_list)
taxa_contams <- left_join(contams, taxa_list)
# write_delim(taxa_contams, path = "List-of-contaminant-ASVs.txt", delim = "\t")
# Plot total sequences and which are contaminants
# Remove contaminant and count sequence sums per sample to see which samples had the highest number of contamiant sequences removed.
# After remove contaminants, what % of sequences is removed?
# head(GR_tagseq_counts[1:2,])
GR_tagseq_longformat$CONTAM <- "Pass"
# head(contams[1:2,])
# str(list_of_contams)
GR_tagseq_longformat$CONTAM[GR_tagseq_longformat$Feature.ID %in% list_of_contams] = "Fail"
# head(GR_tagseq_counts[1:2,])
# Make character list of all feature.ids to KEEP:
keep1<- subset(GR_tagseq_longformat, CONTAM %in% "Pass")
# length(unique(keep1$Feature.ID))
keep_asvs <- as.character(unique(keep1$Feature.ID)) #see below
#
passfail <- GR_tagseq_longformat %>%
group_by(SAMPLE, CONTAM) %>%
summarise(SUM_CONTAM = sum(COUNT)) %>%
data.frame## [1] 1569829
## [1] 9175
# unique(GR_tagseq_counts$SAMPLEID)
GR_tagseq_counts_noCTRL <- subset(GR_tagseq_longformat, !(SAMPLEID %in% "CTRL"))
# New total number of sequences
sum(GR_tagseq_counts_noCTRL$COUNT)## [1] 1479273
counts_decont <- subset(GR_tagseq_longformat, !(Feature.ID %in% list_of_contams))
length(unique(counts_decont$Feature.ID)) - length(unique(GR_tagseq_longformat$Feature.ID)) # Confirm 34 lines removed## [1] -34
# % of sequences was removed following decontam; this is counting the ship blank samples themselves
100*(1-(sum(counts_decont$COUNT)/sum(GR_tagseq_counts_noCTRL$COUNT)))## [1] 1.23581
## Using SUM_CONTAM as value column: use value.var to override.
passfail_wide$PercLossSeq <- paste(100*(passfail_wide$Fail/(passfail_wide$Fail+passfail_wide$Pass)))
# dim(passfail_wide)
# write.csv(passfail_wide, file="PercSeqLost-decontam.csv")
# breakdown by sample - reports % lost per sample
# Remove contaminant sequences from phyloseq object:
# Subset TRUE contaminants
# ?prune_taxa
# class(keep_asvs)
physeq_tmp <- prune_taxa(keep_asvs, physeq_names)
# sample_data(physeq_tmp)
# Remove one sample with too few sequences
physeq_clean <- subset_samples(physeq_tmp, sample_names(physeq_tmp) !="GordaRidge_BSW020_sterivex_2019_REPa")
# sample_data(physeq_clean)
# physeq_clean
# Remove control samples from data frame
tmp <- subset(GR_tagseq_longformat, !(SAMPLEID %in% "CTRL")) # Remove controls, get list of sample names that are controls
samples_keep <- as.character(unique(tmp$SAMPLE))
physeq_clean_true <- prune_samples(samples_keep, physeq_clean)
# Save as R Data
save(counts_decont, file="data-input/GR-ASV-table-clean-19-08-2020.RData")Import cleaned ASV data, curate taxonomic assignments specific to protists.
## Loading objects:
## counts_decont
gr_counts <- counts_decont %>%
filter(COUNT > 0) %>%
separate(Taxon_updated, c("Kingdom","Supergroup","Division","Class","Order","Family","Genus","Species"), sep = ";", remove = FALSE) %>%
data.frame
# head(gr_counts)
tax_only_tmp <- gr_counts %>%
select(Taxon_updated, Kingdom,Supergroup,Division,Class,Order,Family,Genus,Species) %>%
distinct() %>%
data.frameventnames <- read.delim("data-input/ventnames-gordaridge.txt")
colnames(ventnames)[1]<-"SAMPLE"
# Join with dataframe
gr_counts_name <- gr_counts %>%
left_join(select(ventnames, SAMPLE, LOCATION_SPECIFIC, Sampletype, LocationName)) %>%
data.frame## Joining, by = "SAMPLE"
#ventnames[c(1,3,5:6)]
gr_counts_name$LocationName[gr_counts_name$LOCATION == "Shipblank"]="Shipblank"pr2_curate <- function(df){
# Add a column
df$Taxa <-"Unassigned-Eukaryote"
df$Taxa[df$Supergroup == "Alveolata"]="Alveolata-Other"
df$Taxa[df$Division == "Ciliophora"]="Alveolata-Ciliates"
df$Taxa[df$Division == "Dinoflagellata"]="Alveolata-Dinoflagellates"
df$Taxa[df$Class == "Syndiniales"] = "Alveolata-Syndiniales"
df$Taxa[df$Class == "Apicomplexa"]="Alveolata-Apicomplexa"
df$Taxa[df$Supergroup == "Hacrobia"]="Hacrobia-Other"
df$Taxa[df$Division == "Cryptophyta"]="Hacrobia-Cryptophyta"
df$Taxa[df$Division == "Haptophyta"]="Hacrobia-Haptophyta"
df$Taxa[df$Supergroup == "Opisthokonta"]="Opisthokonta-Other"
df$Taxa[df$Division == "Fungi"]="Opisthokonta-Fungi"
df$Taxa[df$Division == "Metazoa"]="Opisthokonta-Metazoa"
df$Taxa[df$Supergroup == "Stramenopiles"]="Stramenopiles-Other"
df$Taxa[df$Division == "Ochrophyta"]="Stramenopiles-Ochrophyta"
mast <- unique(filter(df, grepl("MAST", Class)) %>% select(Class))
mast_list <- as.character(mast$Class)
df$Taxa[df$Class %in% mast_list]="Stramenopiles-MAST"
df$Taxa[df$Supergroup == "Archaeplastida"]="Archaeplastida-Other"
df$Taxa[df$Division == "Chlorophyta"]="Archaeplastida-Chlorophyta"
df$Taxa[df$Supergroup == "Excavata"]="Excavata"
# df$Taxa[df$Division == "Metamonada"]="Excavata-Metamonada"
df$Taxa[df$Supergroup == "Amoebozoa"]="Amoebozoa"
# df$Taxa[df$Division == "Lobosa"]="Amoebozoa-Lobosa"
df$Taxa[df$Supergroup == "Rhizaria"]="Rhizaria-Other"
df$Taxa[df$Division == "Cercozoa"]="Rhizaria-Cercozoa"
df$Taxa[df$Division == "Radiolaria"]="Rhizaria-Radiolaria"
return(df)
}gr_counts_wtax <- pr2_curate(gr_counts_name)
# head(gr_counts_wtax[1:3,])
# unique(gr_counts_wtax$Taxa)Output is the full ASV table with added columns for curated taxonomy. Above also provides a list of the unique taxonomic names assigned.
gr_counts_wtax_samplesonly <- subset(gr_counts_wtax, !(Sampletype == "control"))
## To average across replicates, modify SUPR sample names
gr_counts_filter <- gr_counts_wtax_samplesonly
gr_counts_filter$SAMPLEID<- sub("SUPRS9", "SUPR", gr_counts_filter$SAMPLEID)
gr_counts_filter$SAMPLEID<- sub("SUPRS11", "SUPR", gr_counts_filter$SAMPLEID)
gr_counts_filter$SAMPLEID<- sub("SUPRS10", "SUPR", gr_counts_filter$SAMPLEID)
gr_counts_filter$SAMPLEID<- sub("SUPRS2", "SUPR", gr_counts_filter$SAMPLEID)
gr_counts_filter$SAMPLEID<- sub("SUPRS3", "SUPR", gr_counts_filter$SAMPLEID)
gr_counts_filter$SAMPLEID<- sub("SUPRS1", "SUPR", gr_counts_filter$SAMPLEID)## [1] "Alveolata-Ciliates" "Unassigned-Eukaryote"
## [3] "Opisthokonta-Metazoa" "Archaeplastida-Other"
## [5] "Opisthokonta-Fungi" "Stramenopiles-MAST"
## [7] "Stramenopiles-Ochrophyta" "Alveolata-Dinoflagellates"
## [9] "Alveolata-Syndiniales" "Rhizaria-Radiolaria"
## [11] "Hacrobia-Haptophyta" "Opisthokonta-Other"
## [13] "Archaeplastida-Chlorophyta" "Rhizaria-Cercozoa"
## [15] "Amoebozoa" "Hacrobia-Other"
## [17] "Stramenopiles-Other" "Hacrobia-Cryptophyta"
## [19] "Alveolata-Other" "Rhizaria-Other"
## [21] "Excavata"
# Sum of all sequences
a <- sum(gr_counts_filter %>% filter(!(SAMPLEID == "CTRL")) %>% select(COUNT)); a## [1] 1434482
# Total ASVs
dim(unique(gr_counts_filter %>% filter(!(SAMPLEID == "CTRL")) %>% select(Feature.ID)))[1]## [1] 9028
# Percentage of all sequences Unassigned Eukaryote
x <- sum(gr_counts_filter %>% filter(!(SAMPLEID == "CTRL")) %>% filter(Taxon_updated == "Eukaryota") %>% select(COUNT))
100*(x/a)## [1] 2.823876
# Total ASVs left "Unassigned-Eukaryote"
dim(unique(gr_counts_filter %>% filter(!(SAMPLEID == "CTRL")) %>% filter(Taxon_updated == "Eukaryota") %>% select(Feature.ID)))[1]## [1] 1058
# Percentage of all sequences assigned Opisthokonts
x <- sum(gr_counts_filter %>% filter(!(SAMPLEID == "CTRL")) %>% filter(Supergroup == "Opisthokonta") %>% select(COUNT))
100*(x/a)## [1] 12.92606
dim(unique(gr_counts_filter %>% filter(!(SAMPLEID == "CTRL")) %>% filter(Supergroup == "Opisthokonta") %>% select(Feature.ID)))[1]## [1] 615
Average ASV sequence counts across replicate samples, COUNT_AVG column will now equal the ASV sequence count value across replicates
gr_counts_avg_wtax <- gr_counts_filter %>%
group_by(Feature.ID, SAMPLEID, Sampletype, LOCATION_SPECIFIC, LocationName, Taxon_updated, Kingdom, Supergroup, Division, Class, Order, Family, Genus, Species, Taxa) %>%
summarise(COUNT_AVG = mean(COUNT)) %>%
as.data.frame
# dim(gr_counts_filter);dim(gr_counts_avg_wtax)
# Save output and view
# save(gr_counts_filter,gr_counts_wtax, gr_counts_avg_wtax, file="data-input/GordaRidge-ASVtable-avg-19-08-2020.RData")
# tmp <- gr_counts_avg_wtax %>% select(Taxa, Taxon_updated, Kingdom, Supergroup, Division, Class, Order, Family, Genus, Species) %>% distinct() %>% data.frame
# write_delim(tmp, path = "tax-tmp-2.txt", delim = "\t")Now sum ASV counts by curated taxonomic level
gr_counts_avg_TAXA <- gr_counts_avg_wtax %>%
# sum by like taxa
group_by(SAMPLEID, Sampletype, LocationName, Taxa) %>%
summarise(SUM = sum(COUNT_AVG)) %>%
unite(SAMPLE, LocationName, Sampletype, SAMPLEID, sep = " ", remove = FALSE) %>%
data.frame
# View(gr_counts_avg_TAXA)
# head(gr_counts_avg_TAXA)
supp_table <- gr_counts_avg_TAXA %>%
pivot_wider(names_from = Taxa, values_from = SUM, values_fill = 0)
write_delim(supp_table, path = "Suppl-18S-relabun.txt", delim = "\t")# unique(gr_counts_avg_TAXA$Taxa)
level2ORDER <- c("Alveolata-Ciliates","Alveolata-Dinoflagellates","Alveolata-Syndiniales","Alveolata-Other","Rhizaria-Cercozoa","Rhizaria-Radiolaria","Rhizaria-Other","Stramenopiles-MAST","Stramenopiles-Ochrophyta","Stramenopiles-Other","Hacrobia-Cryptophyta","Hacrobia-Haptophyta","Hacrobia-Other","Amoebozoa","Excavata","Archaeplastida-Chlorophyta","Archaeplastida-Other","Opisthokonta-Fungi","Opisthokonta-Metazoa","Opisthokonta-Other","Unassigned-Eukaryote")
level2color <- c("#f1eef6","#d7b5d8","#df65b0","#ce1256","#fc9272","#ef3b2c","#800026","#fff7bc","#fec44f","#d95f0e","#74c476","#238b45","#00441b","#7fcdbb","#084081","#2b8cbe","#016c59","#bcbddc","#807dba","#54278f","#bdbdbd")
gr_counts_avg_TAXA$LEVEL2ORDER <- factor(gr_counts_avg_TAXA$Taxa, levels=level2ORDER)
names(level2color)<-level2ORDER
sample_order_all<-c("Shallow seawater in situ sterivex","Deep seawater in situ sterivex","Mt Edwards Plume in situ sterivex","Mt Edwards Plume Grazing T0","Mt Edwards Plume Grazing T24","Mt Edwards Plume Grazing T36","Mt Edwards Vent in situ SUPR","Mt Edwards Vent Grazing T0","Mt Edwards Vent Grazing T36","Venti Latte Vent in situ SUPR","Venti Latte Vent Grazing T0","Venti Latte Vent Grazing T36","Candelabra Vent in situ SUPR","Candelabra Vent Grazing T24","SirVentsAlot Vent in situ SUPR","SirVentsAlot Vent Grazing T24")
sample_name_all<-c("Shallow seawater","Deep seawater","Mt Edwards Plume","Mt Edwards Plume T0","Mt Edwards Plume T23","Mt Edwards Plume T35","Mt Edwards Vent","Mt Edwards Vent T0","Mt Edwards Vent T36","Venti Latte Vent","Venti Latte Vent T0","Venti Latte Vent T29","Candelabra Vent","Candelabra Vent T26","SirVentsAlot Vent","SirVentsAlot Vent T24")
gr_counts_avg_TAXA$SAMPLE_ORDER <- factor(gr_counts_avg_TAXA$SAMPLE, levels = sample_order_all, labels = sample_name_all)
exporder <- c("sterivex", "SUPR", "T0", "T24", "T36")
gr_counts_avg_TAXA$SAMPLEID_ORDER <- factor(gr_counts_avg_TAXA$SAMPLEID, levels = exporder)
gr_counts_avg_TAXA$LOCATION_ORDER <- factor(gr_counts_avg_TAXA$LocationName, levels=c("Shallow seawater", "Deep seawater", "Mt Edwards Plume", "Mt Edwards Vent", "Venti Latte Vent", "Candelabra Vent", "SirVentsAlot Vent"))
# head(gr_counts_avg_TAXA)barplot_lev2 <- function(df){
ggplot(df, aes(x=SAMPLE_ORDER, y=SUM, fill=LEVEL2ORDER)) +
geom_bar(stat="identity", position="fill", color="black") +
scale_fill_manual(values=level2color) +
scale_y_continuous(expand = c(0,0))+
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, color = "black", face="bold", size = 12),
axis.text.y = element_text(color="black", face="bold", size = 12),
strip.text = element_blank(),
legend.title = element_blank()) +
labs(x="", y="Relative abundance")+
facet_grid(.~LOCATION_ORDER, space = "free", scales = "free")
}insitu <- c("sterivex", "SUPR")
rm <- c("Unassigned", "Opisthokonta-Other", "Opisthokonta-Fungi", "Opisthokonta-Metazoa")
# svg("GR-insitu-tax.svg", w = 8, h = 8)
barplot_lev2(filter(gr_counts_avg_TAXA, (SAMPLEID %in% insitu & !(Taxa %in% rm))))# options(repr.plot.width = 14, repr.plot.height = 8)
insitu <- c("sterivex", "SUPR")
rm <- c("Unassigned", "Opisthokonta-Other", "Opisthokonta-Fungi", "Opisthokonta-Metazoa")
# svg("GR-all-tax-supplementary.svg", w = 20, h = 8)
plot_grid(
barplot_lev2(filter(gr_counts_avg_TAXA, !(Taxa %in% rm))) + theme(legend.position = "none"),
barplot_lev2(filter(gr_counts_avg_TAXA)),
ncol = 2, rel_widths = c(0.70,1), labels = c("a", "b"))# Import metadata for 16S
ventnames_16 <- read.delim("data-input/ventnames-gordaridge-16S.txt")
ventnames_16_mod <- ventnames_16 %>%
mutate(location = case_when(
grepl("Plume", SAMPLE_AMY) ~ "Plume",
grepl("BSW", SAMPLE_AMY) ~ "BSW",
grepl("Vent", LocationName) ~ "Vent"),
NA_NUM = SAMPLEID_16S) %>%
mutate(NA_NUM = str_replace(NA_NUM, "NA108", "")) %>%
mutate(NA_NUM = str_replace(NA_NUM, "NA080", "")) %>%
mutate(NA_NUM = str_replace(NA_NUM, "aSTEP", "")) %>%
mutate(NA_NUM = str_replace(NA_NUM, "bSTEP", "")) %>%
mutate(NA_NUM = str_replace(NA_NUM, "STEP20200226", "")) %>%
mutate(NA_NUM = str_replace(NA_NUM, "STEP", "")) %>%
unite(NEW_SAMPLEID, location, NA_NUM, sep ="") %>%
data.frame
countbac <- read.delim("data-input/CountTable-wtax-16s-plus3-2020-06-23.txt")
# Remove samples that were repeated
rm <- c("NA108003STEP", "NA108039STEP", "NA108087STEP")
# Include only samples that also appear int he 18S dataset
x <- as.character(unique(ventnames$LocationName))tmp <- countbac %>%
select(-all_of(rm)) %>%
pivot_longer(starts_with("NA"), names_to = "SAMPLEID_16S") %>%
left_join(ventnames_16_mod) %>%
filter(LocationName %in% x) %>%
data.frame## Joining, by = "SAMPLEID_16S"
## [1] 1090141
## [1] 6532
bac_df_plot <- countbac %>%
separate(Taxon, sep = ";D_[[:digit:]]__", into = c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"), remove = TRUE, extra = "merge") %>% # Warnings are OK with NAs
mutate_if(is.character, str_replace_all, pattern = "D_0__", replacement = "") %>%
filter(Domain %in% "Archaea" | Domain %in% "Bacteria") %>% #Select only archaea and bacteria, removing unassigned
select(-all_of(rm)) %>% # Remove samples we are replacing
pivot_longer(starts_with("NA"), names_to = "SAMPLEID_16S") %>%
left_join(ventnames_16_mod) %>%
filter(LocationName %in% x) %>%
data.frame## [1] 1089209
## [1] 6497
bac_df_filtered <- bac_df_plot %>%
ungroup() %>%
mutate(TOTALSEQ = sum(value)) %>%
group_by(Feature.ID) %>%
summarise(SUM = sum(value),
RELABUN = SUM/TOTALSEQ) %>%
filter(RELABUN >= 0.001) %>%
add_column(KEEP = "YES") %>%
right_join(bac_df_plot) %>%
filter(KEEP == "YES") %>%
data.frame## `summarise()` regrouping output by 'Feature.ID' (override with `.groups` argument)
## Joining, by = "Feature.ID"
# colnames(bac_df_plot)
# head(bac_df_plot)
# ASVs that are within the top 0.1% abundance
top16s_asvs <- as.character(unique(bac_df_filtered$Feature.ID))
# Subset a dataframe with only ASVs
asv_curated <- bac_df_plot %>%
select(Feature.ID, Domain, Phylum, Class, Order, Family, Genus, Species) %>%
distinct() %>% data.frame
# head(asv_curated)
# head(bac_df_plot)Here we are curating the 16S taxonomy assignments so we can get an informative look at the in situ bacteria population diversity and biogeography.
# Add a column for updated taxonomy name
curate_16s_tax <- function(df, THRESHOLD){
# List the class and genus level designations that should be named at class level
class <- c("Alphaproteobacteria", "Deltaproteobacteria", "Gammaproteobacteria", "Nitrososphaeria", "Thermoplasmata")
genus <- c("Arcobacter","Campylobacter","Hydrogenimonas","Nitratiruptor","Nitratifractor","Sulfurovum","Sulfurimonas","Caminibacter", "Psychrilyobacter", "SUP05 cluster")
# List the appropriate taxonomic names for this whole level to be placed into "Other" category
class_other <- c("Verrucomicrobiae")
phylum_other <- c("Planctomycetes", "Poribacteria", "Cyanobacteria", "WPS-2")
order_other <- c("Synechococcales")
totalsumseq <- sum(df$value) # total number of sequences
tmp_filter <- df %>%
group_by(Feature.ID) %>%
summarise(SUM = sum(value)) %>%
mutate(RELABUN = 100*(SUM/totalsumseq)) %>%
filter(RELABUN >= THRESHOLD) %>% #User-defined relabun threshold
data.frame
keep_asvs_relabun <- as.character(unique(tmp_filter$Feature.ID))
df2 <- df %>%
mutate(Tax_update = Phylum) %>% # Default to filling new taxa level to phylum
mutate(Tax_update = ifelse(Feature.ID %in% keep_asvs_relabun, Tax_update, "Other"), # Change name to other if it falls below relative abundance Threshold
Tax_update = ifelse(Class %in% class, paste(Phylum, Class, sep = "-"), Tax_update),
Tax_update = ifelse(Order == "Methylococcales", paste(Phylum, "Methylococcales", sep = "-"), Tax_update),
Tax_update = ifelse(Order == "Oceanospirillales", paste(Phylum, "Oceanospirillales", sep = "-"), Tax_update),
Tax_update = ifelse(Order == "Thioglobaceae", paste(Phylum, "Thioglobaceae", sep = "-"), Tax_update),
Tax_update = ifelse(Family == "Nitrospinaceae", paste(Phylum, "Nitrospinaceae", sep = "-"), Tax_update),
Tax_update = ifelse(Class %in% class_other, "Other", Tax_update),
Tax_update = ifelse(Phylum %in% phylum_other, "Other", Tax_update),
Tax_update = ifelse(Order %in% order_other, "Other", Tax_update),
Tax_update = ifelse(Genus %in% genus, paste(Phylum, Genus, sep = "-"), Tax_update))
return(df2)
}# head(bac_df_plot) # Add updated taxa list to this dataframe
# unique(bac_df_plot$LocationName)
bac_wcuratedtax <- curate_16s_tax(bac_df_plot %>%
filter(!(Genus == "Ralstonia")), 0.1) #Will place ASVs <0.1% abundance into "Other category"## `summarise()` ungrouping output (override with `.groups` argument)
# update exisiting taxonomy
bac_wcuratedtax_toplot <- bac_wcuratedtax %>%
# left_join(asv_curated_updated) %>%
# filter(!(Genus == "Ralstonia")) %>%
# Average ASV seq count across replicates
group_by(Feature.ID, LocationName, Tax_update) %>%
summarise(AVG_count = mean(value)) %>%
ungroup() %>%
group_by(LocationName, Tax_update) %>%
summarise(SUM_bytax = sum(AVG_count)) %>%
data.frame
# unique(bac_wcuratedtax$Tax_update)
unique(bac_wcuratedtax_toplot$LocationName)## [1] Candelabra Vent Deep seawater Mt Edwards Plume Mt Edwards Vent
## [5] Shallow seawater SirVentsAlot Vent Venti Latte Vent
## 10 Levels: Blue Ciliate Candelabra Vent Deep seawater ... Venti Latte Vent
bac_wcuratedtax_toplot$LOCATION <- factor(bac_wcuratedtax_toplot$LocationName, levels = c("Shallow seawater", "Deep seawater", "Mt Edwards Plume", "Mt Edwards Vent", "Venti Latte Vent", "SirVentsAlot Vent", "Candelabra Vent"))
#
# bac_wtax <- bac_df_plot %>%
# left_join(asv_curated_updated) %>%
# filter(!(Genus == "Ralstonia")) %>%
# # Average ASV seq count across replicates
# group_by(Feature.ID, LocationName, Tax_update) %>%
# summarise(AVG_count = mean(value)) %>%
# data.frame
# write_delim(bac_wtax, path = "bac-wtaxassigned-20-08-2020.txt", delim = "\t")tax_color<-c("#a50026","#d73027","#f46d43","#fdae61","#fee090","#ffffbf","#40004b","#762a83","#9970ab","#c2a5cf","#e7d4e8","#d9f0d3","#a6dba0","#5aae61","#1b7837","#00441b","#e0f3f8","#abd9e9","#74add1","#4575b4","#313695","#8e0152","#c51b7d","#de77ae","#f1b6da","#fde0ef","#e6f5d0","#b8e186","#7fbc41","#4d9221","#276419","#bababa","#878787","#4d4d4d","#1a1a1a")
tax_order <- c("Epsilonbacteraeota-Arcobacter","Epsilonbacteraeota-Caminibacter","Epsilonbacteraeota-Campylobacter","Epsilonbacteraeota-Hydrogenimonas","Epsilonbacteraeota-Nitratifractor","Epsilonbacteraeota-Nitratiruptor","Epsilonbacteraeota-Sulfurimonas","Epsilonbacteraeota-Sulfurovum","Proteobacteria-Alphaproteobacteria","Proteobacteria-Deltaproteobacteria","Proteobacteria-Gammaproteobacteria","Proteobacteria-Methylococcales","Proteobacteria-Oceanospirillales","Proteobacteria-SUP05 cluster","Acidobacteria","Actinobacteria","Aquificae","Bacteroidetes","Chloroflexi","Thaumarchaeota-Nitrososphaeria","Euryarchaeota-Thermoplasmata","Fusobacteria-Psychrilyobacter","Marinimicrobia (SAR406 clade)","Nitrospinae-Nitrospinaceae","Other")
bac_wcuratedtax_toplot$TAX_ORDER <- factor(bac_wcuratedtax_toplot$Tax_update, levels = tax_order)
barplot_16s <- function(df){
ggplot(df, aes(x = LOCATION, y = SUM_bytax, fill = TAX_ORDER)) +
geom_bar(stat = "identity", position = "fill", color = "black") +
scale_fill_manual(values = tax_color) +
scale_y_continuous(expand = c(0,0)) +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color = "black", face="bold", size = 12),
axis.text.y = element_text(color = "black", face="bold", size = 12),
strip.text = element_blank(),
legend.title = element_blank()) +
labs(x="", y="Relative abundance") +
facet_grid(.~LOCATION, space = "free", scales = "free") +
guides(fill=guide_legend(ncol=1))
}euk_key <- get_legend(barplot_lev2(filter(gr_counts_avg_TAXA, (SAMPLEID %in% insitu & !(Taxa %in% rm)))))
prok_key <- get_legend(barplot_16s(bac_wcuratedtax_toplot))
# svg("panel-18s-16s-barplot.svg", w=20, h = 10)
plot_grid(
barplot_lev2(filter(gr_counts_avg_TAXA, (SAMPLEID %in% insitu & !(Taxa %in% rm)))) + theme(legend.position = "none"),
euk_key,
barplot_16s(bac_wcuratedtax_toplot) + theme(legend.position = "none"),
prok_key,
rel_widths = c(1,0.5,1,0.5),
align = ("v"),
labels = c("a.", "", "b.", ""),
ncol = 4)## Warning: Graphs cannot be vertically aligned unless the axis parameter is set.
## Placing graphs unaligned.
## Feature.ID Domain Phylum Class
## 1 139ad7417ef0dfb630699faea2eb5e06 Archaea Thaumarchaeota Nitrososphaeria
## 2 139ad7417ef0dfb630699faea2eb5e06 Archaea Thaumarchaeota Nitrososphaeria
## 3 139ad7417ef0dfb630699faea2eb5e06 Archaea Thaumarchaeota Nitrososphaeria
## 4 139ad7417ef0dfb630699faea2eb5e06 Archaea Thaumarchaeota Nitrososphaeria
## 5 139ad7417ef0dfb630699faea2eb5e06 Archaea Thaumarchaeota Nitrososphaeria
## 6 139ad7417ef0dfb630699faea2eb5e06 Archaea Thaumarchaeota Nitrososphaeria
## Order Family Genus
## 1 Nitrosopumilales Nitrosopumilaceae uncultured marine archaeon
## 2 Nitrosopumilales Nitrosopumilaceae uncultured marine archaeon
## 3 Nitrosopumilales Nitrosopumilaceae uncultured marine archaeon
## 4 Nitrosopumilales Nitrosopumilaceae uncultured marine archaeon
## 5 Nitrosopumilales Nitrosopumilaceae uncultured marine archaeon
## 6 Nitrosopumilales Nitrosopumilaceae uncultured marine archaeon
## Species Confidence SAMPLEID_16S value
## 1 uncultured marine archaeon 0.86145 NA108001aSTEP 1179
## 2 uncultured marine archaeon 0.86145 NA108001bSTEP 2971
## 3 uncultured marine archaeon 0.86145 NA108003STEP20200226 6931
## 4 uncultured marine archaeon 0.86145 NA108009STEP 0
## 5 uncultured marine archaeon 0.86145 NA108010STEP 155
## 6 uncultured marine archaeon 0.86145 NA108012STEP 677
## SAMPLE_AMY Sampletype SAMPLEID LocationName STATUS
## 1 Niskin Plume Mt. Edwards in situ sterivex Mt Edwards Plume keep
## 2 Niskin Plume Mt. Edwards in situ sterivex Mt Edwards Plume keep
## 3 Niskin BSW in situ sterivex Deep seawater keep
## 4 SUPR Filter Mt. Edwards in situ SUPR Mt Edwards Vent keep
## 5 SUPR Filter Mt. Edwards in situ SUPR Mt Edwards Vent keep
## 6 SUPR BAG Mt. Edwards in situ SUPR Mt Edwards Vent keep
## FLUIDORIGIN NEW_SAMPLEID Tax_update
## 1 Niskin7 Plume001 Thaumarchaeota-Nitrososphaeria
## 2 Niskin7 Plume001 Thaumarchaeota-Nitrososphaeria
## 3 Niskin10 BSW003 Thaumarchaeota-Nitrososphaeria
## 4 S1 Vent009 Thaumarchaeota-Nitrososphaeria
## 5 S2 Vent010 Thaumarchaeota-Nitrososphaeria
## 6 S7 Vent012 Thaumarchaeota-Nitrososphaeria
bac_wcuratedtax$LOCATION <- factor(bac_wcuratedtax$LocationName, levels = c("Shallow seawater", "Deep seawater", "Mt Edwards Plume", "Mt Edwards Vent", "Venti Latte Vent", "SirVentsAlot Vent", "Candelabra Vent"))
relabun_16s <- bac_wcuratedtax %>%
#Sum by sample
group_by(LOCATION) %>%
mutate(total = sum(value)) %>%
mutate(RelAbun = 100*(value/total)) %>%
data.frame
# # head(relabun_16s)# Interactive plot
relabun_16s %>% plot_ly(x = ~RelAbun,
y = ~LOCATION,
color = ~Tax_update,
name = ~Tax_update,
type = 'bar',
orientation = 'h',
colors = tax_color,
text = ~Tax_update) %>%
layout(barmode = "stack")## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
load("data-input/GordaRidge-ASVtable-avg-19-08-2020.RData")
library(reshape2);
library(vegan);
library(dplyr)
library(ade4);
library(compositions);
library(tidyverse)
library(purrr)
library(cluster)
library(RColorBrewer)
library(ape)# Remove controls
gr_counts_allsamples <- subset(counts_decont, !(SAMPLEID == "CTRL"))
gr_nums <- gr_counts_allsamples[, c("Feature.ID", "SAMPLE", "COUNT")]
tax <- gr_counts_allsamples[, c("Feature.ID", "Taxon_updated")]
# unique(gr_nums$SAMPLE)
gr_nums_filter <- subset(gr_nums, !(SAMPLE == "GordaRidge_BSW020_sterivex_2019_REPa"))
gr_nums_wide <- dcast(gr_nums_filter, Feature.ID~SAMPLE, fill=0)## Using COUNT as value column: use value.var to override.
# Jaccard distance
jac_gr_mat <- vegdist(t(gr_nums_wide), method = "jaccard")
# PCoA
pcoa_jac_gr <- pcoa(jac_gr_mat)Jaccard distance transformation was done on the raw sequences reads. Below is a diagnostic scree plot to determine the appropriate number of axes.
# Extract variances from pcoa, from jaccard calculated dist. metric
jac_var_gr <- data.frame(pcoa_jac_gr$values$Relative_eig) %>%
select(PercVar = 'pcoa_jac_gr.values.Relative_eig') %>%
rownames_to_column(var = "PCaxis") %>%
data.frame
head(jac_var_gr[1:4,]) #Look at first 4 axes## PCaxis PercVar
## 1 1 0.11270615
## 2 2 0.08524318
## 3 3 0.07306333
## 4 4 0.06505124
# Screeplot check
ggplot(jac_var_gr, aes(x = as.numeric(PCaxis), y = PercVar)) +
geom_bar(stat = "identity", fill = "grey", color = "black") +
theme_minimal() +
theme(axis.title = element_text(color = "black", face = "bold", size = 10),
axis.text.y = element_text(color = "black", face = "bold"),
axis.text.x = element_blank()) +
labs(x = "PC axis", y = "% Variance", title = "Log-Ratio PCoA Screeplot")## Run 0 stress 0.1774623
## Run 1 stress 0.2193465
## Run 2 stress 0.1755825
## ... New best solution
## ... Procrustes: rmse 0.124748 max resid 0.4205996
## Run 3 stress 0.1775817
## Run 4 stress 0.2148195
## Run 5 stress 0.1774623
## Run 6 stress 0.1775817
## Run 7 stress 0.1775817
## Run 8 stress 0.207624
## Run 9 stress 0.1775817
## Run 10 stress 0.1774623
## Run 11 stress 0.2138939
## Run 12 stress 0.1755826
## ... Procrustes: rmse 8.847588e-05 max resid 0.0003193724
## ... Similar to previous best
## Run 13 stress 0.1755825
## ... Procrustes: rmse 3.223444e-05 max resid 9.831384e-05
## ... Similar to previous best
## Run 14 stress 0.2482208
## Run 15 stress 0.1667834
## ... New best solution
## ... Procrustes: rmse 0.1387968 max resid 0.405167
## Run 16 stress 0.1667834
## ... New best solution
## ... Procrustes: rmse 5.706342e-05 max resid 0.0001853986
## ... Similar to previous best
## Run 17 stress 0.1636427
## ... New best solution
## ... Procrustes: rmse 0.1173622 max resid 0.4152786
## Run 18 stress 0.1878499
## Run 19 stress 0.1878508
## Run 20 stress 0.2172173
## *** No convergence -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
## [1] 0.1636427
ventnames <- read.delim("data-input/ventnames-gordaridge.txt")
# names(ventnames);head(ventnames)
colnames(ventnames)[1]<-"SAMPLE"
jac_nmds_df <- data.frame(jac_nmds_gr$points) %>%
rownames_to_column(var = "SAMPLE") %>%
separate(SAMPLE, into = c("DATASET", "LOCATION_SPECIFIC", "SAMPLEID", "YEAR", "REP"), sep = "_", remove = FALSE) %>%
left_join(ventnames) %>%
data.frame## Warning: Expected 5 pieces. Missing pieces filled with `NA` in 16 rows [2, 10,
## 11, 12, 13, 17, 18, 19, 22, 23, 24, 25, 26, 27, 28, 29].
## Joining, by = c("SAMPLE", "LOCATION_SPECIFIC", "SAMPLEID")
# Factoring
sample_order <- c('Mt Edwards Plume','Mt Edwards Vent','Candelabra Vent','SirVentsAlot Vent','Venti Latte Vent','Deep seawater', 'Shallow seawater')
sample_color <-c("lightblue","#377eb8", "#e41a1c", "#4daf4a", "#984ea3", "orange", "yellow")
jac_nmds_df$SAMPLE_ORDER <- factor(jac_nmds_df$LocationName, levels = rev(sample_order))
names(sample_color)<-sample_order
# head(jac_nmds_df)library(ggrepel)
# options(repr.plot.width = 8, repr.plot.height = 6)
# svg("PCA-wlabels-gr-22-07-2020.svg", w=8, h=8)
nmds_jac_18s <- ggplot(jac_nmds_df, aes(x = MDS1, y = MDS2,
fill = LocationName,
shape = Sampletype,
label = LOCATION_SPECIFIC)) + #Replace label=SAMPLEID.y
geom_hline(yintercept = 0) + geom_vline(xintercept = 0) +
geom_point(aes(x = MDS1, y = MDS2, fill = LocationName), size = 5) +
scale_fill_manual(values = sample_color) +
# labs(x = "NMDS 1", y = "NMDS 2", title = "Jaccard Distance NMDS") +
labs(x = "NMDS 1", y = "NMDS 2", title = "") +
scale_shape_manual(values = c(23,21))+
theme_bw()+
geom_text_repel()+
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank())+
guides(fill = guide_legend(override.aes=list(shape=21)))
nmds_jac_18s # dev.off()# # Transform data - with CLR
# # Log-ratio
log_rats<-data.frame(compositions::clr(t(gr_nums_wide)))
# # look at eigenvalues
pca_lr <- prcomp(log_rats)
variance_lr <- (pca_lr$sdev^2)/sum(pca_lr$sdev^2)
# head(variance_lr)
barplot(variance_lr,
main='Log-Ratio PCA Screeplot',
xlab='PC Axis',
ylab='% Variance',
cex.names=1.5,cex.axis=1.5,cex.lab=1.5,cex.main=1.5) > Based on this screeplot - 2 axis are OK, as they show 0.079 and 0.077, respectively, of the variance.
# # Extract PCA and plot
pca_lr_frame<-data.frame(pca_lr$x,SAMPLE=rownames(pca_lr$x))
# # head(pca_lr_frame)
x <- colsplit(pca_lr_frame$SAMPLE, "_", c("DATASET", "LOCATION", "SAMPLEID", "YEAR", "REP"));
pca_lr_frame <- data.frame(x, pca_lr_frame)
# head(pca_lr_frame)
ventnames <- read.delim("data-input/ventnames-gordaridge.txt")
# names(ventnames);head(ventnames)
# # View(ventnames)
colnames(ventnames)[1]<-"SAMPLE"
# #
pca_lr_frame_wNames <- left_join(pca_lr_frame, ventnames, by="SAMPLE")
# head(pca_lr_frame_wNames)
# #
# unique(pca_lr_frame_wNames$LocationName)
sample_order <- c('Mt Edwards Plume','Mt Edwards Vent','Candelabra Vent','SirVentsAlot Vent','Venti Latte Vent','Deep seawater', 'Shallow seawater')
sample_color <-c("lightblue","#377eb8", "#e41a1c", "#4daf4a", "#984ea3", "orange", "yellow")
pca_lr_frame_wNames$SAMPLE_ORDER <- factor(pca_lr_frame_wNames$LocationName, levels = rev(sample_order))
names(sample_color)<-sample_order
# head(pca_lr_frame_wNames)library(ggrepel)
# options(repr.plot.width = 8, repr.plot.height = 8)
# svg("PCA-wlabels-gr-22-07-2020.svg", w=8, h=8)
pca_18s <- ggplot(pca_lr_frame_wNames, aes(x=PC1,y=PC2,fill=LocationName,shape=Sampletype, label=LOCATION_SPECIFIC))+ #Replace label=SAMPLEID.y
geom_point(aes(x=PC1,y=PC2,fill=LocationName), size=5)+
scale_fill_manual(values = sample_color)+
ylab(paste0('PC2 ',round(variance_lr[2]*100,2),'%'))+
xlab(paste0('PC1 ',round(variance_lr[1]*100,2),'%'))+
scale_shape_manual(values = c(23,21))+
ggtitle('CLR PCA Ordination')+
# coord_fixed(ratio=variance_lr[2]/variance_lr[1])+
theme_bw()+
geom_text_repel()+
theme(axis.text = element_text(color="black", size=12))+
geom_hline(yintercept = 0) + geom_vline(xintercept = 0) # dev.off()# Import and process 16S ASV table
ventnames_16 <- read.delim("data-input/ventnames-gordaridge-16S.txt")
countbac <- read.delim("data-input/CountTable-wtax-16s-plus3-2020-06-23.txt")
countbac_df <- countbac %>%
separate(Taxon, sep = ";D_[[:digit:]]__", into = c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"), remove = TRUE, extra = "merge") %>% # Warnings are OK with NAs
mutate_if(is.character, str_replace_all, pattern = "D_0__", replacement = "") %>%
column_to_rownames(var = "Feature.ID") %>%
data.frame
# Warnings are OK because not all levels fill out taxa spaces
# head(countbac_df[1:3,])
# Total ASVs and sequences:
count_summary <- countbac_df %>%
select(starts_with("NA")) %>%
data.frame
## Create new column in ASV table that reports if an ASV is present in any of the background seawater samples
# head(countbac_df[1:2,]); names(countbac_df)
### Select BSW only samples and make character list:
bsw_samples <- ventnames_16 %>%
filter(LocationName == "Deep seawater" | LocationName == "Shallow seawater") %>%
select(SAMPLEID_16S) %>%
data.frame
bsw_samples <- as.character(unique(bsw_samples$SAMPLEID_16S))
# bsw_samples
# Create new df with bsw ASVs labeled
sub_bsw_df <- countbac_df %>%
rownames_to_column(var = "Feature.ID") %>%
select(Feature.ID, all_of(bsw_samples)) %>%
mutate(sum = rowSums(select_if(., is.numeric))) %>%
filter(sum > 0) %>%
add_column(bsw_presence = "present in bsw") %>%
select(Feature.ID, bsw_presence) %>%
data.frame
# head(sub_bsw_df)
# unique(sub_bsw_df$bsw_presence)
# Add to original ASV table
countbac_df_cat <- countbac_df %>%
rownames_to_column(var = "Feature.ID") %>%
left_join(sub_bsw_df) %>%
mutate(bsw_presence = replace_na(bsw_presence, "vent only")) %>%
select(-Confidence) %>%
mutate(ASV_SUM_ALL = rowSums(select_if(., is.numeric))) %>%
mutate(ASV_REL_ABUN = 100*(ASV_SUM_ALL/sum(ASV_SUM_ALL))) %>%
# filter(ASV_REL_ABUN > ##) %>% #Filter by rel abun
data.frame
# head(countbac_df_cat)# Remove samples that were repeated
rm <- c("NA108003STEP", "NA108039STEP", "NA108087STEP")
# Filtering count table data:
df_start <- countbac_df_cat %>%
filter(Domain %in% "Archaea" | Domain %in% "Bacteria") %>% #Select only archaea and bacteria, removing unassigned
select(-all_of(rm)) %>% # Remove samples we are replacing
# Could include other filtering options if necessary
select(Feature.ID, starts_with("NA108")) %>%
data.frame
# Prep data so it is ready for data transformations and more
df_format <- df_start %>%
column_to_rownames(var = "Feature.ID") %>%
as.matrix
# class(df_format)
ventnames_16_mod <- ventnames_16 %>%
mutate(location = case_when(
grepl("Plume", SAMPLE_AMY) ~ "Plume",
grepl("BSW", SAMPLE_AMY) ~ "BSW",
grepl("Vent", LocationName) ~ "Vent"),
NA_NUM = SAMPLEID_16S) %>%
mutate(NA_NUM = str_replace(NA_NUM, "NA108", "")) %>%
mutate(NA_NUM = str_replace(NA_NUM, "NA080", "")) %>%
mutate(NA_NUM = str_replace(NA_NUM, "aSTEP", "")) %>%
mutate(NA_NUM = str_replace(NA_NUM, "bSTEP", "")) %>%
mutate(NA_NUM = str_replace(NA_NUM, "STEP20200226", "")) %>%
mutate(NA_NUM = str_replace(NA_NUM, "STEP", "")) %>%
unite(NEW_SAMPLEID, location, NA_NUM, sep ="") %>%
data.frame
# ventnames_16_mod# NMDS - Jaccard distance calculation
jac_gr_16s <- vegdist(t(df_format))
# Ordination - PCA
pcoa_jac_16s <- pcoa(jac_gr_16s)
# Extract variances from pcoa, from jaccard calculated dist. metric
jac_var_16s <- data.frame(pcoa_jac_16s$values$Relative_eig) %>%
select(PercVar = 'pcoa_jac_16s.values.Relative_eig') %>%
rownames_to_column(var = "PCaxis") %>%
data.frame
# head(jac_var_16s[1:4,]) #Look at first 4 axes
# Screeplot check
ggplot(jac_var_16s, aes(x = as.numeric(PCaxis), y = PercVar)) +
geom_bar(stat = "identity", fill = "grey", color = "black") +
theme_minimal() +
theme(axis.title = element_text(color = "black", face = "bold", size = 10),
axis.text.y = element_text(color = "black", face = "bold"),
axis.text.x = element_blank()) +
labs(x = "PC axis", y = "% Variance", title = "Log-Ratio PCoA Screeplot")## Run 0 stress 0.1330027
## Run 1 stress 0.1325471
## ... New best solution
## ... Procrustes: rmse 0.06952486 max resid 0.2841175
## Run 2 stress 0.1325477
## ... Procrustes: rmse 0.0002315351 max resid 0.0006870596
## ... Similar to previous best
## Run 3 stress 0.1221584
## ... New best solution
## ... Procrustes: rmse 0.1042181 max resid 0.4838947
## Run 4 stress 0.1220955
## ... New best solution
## ... Procrustes: rmse 0.005784477 max resid 0.0202706
## Run 5 stress 0.1135523
## ... New best solution
## ... Procrustes: rmse 0.07859589 max resid 0.3404599
## Run 6 stress 0.1632819
## Run 7 stress 0.133565
## Run 8 stress 0.1219904
## Run 9 stress 0.1135523
## ... New best solution
## ... Procrustes: rmse 1.320668e-05 max resid 3.740674e-05
## ... Similar to previous best
## Run 10 stress 0.1135975
## ... Procrustes: rmse 0.009193881 max resid 0.03432903
## Run 11 stress 0.145132
## Run 12 stress 0.1392012
## Run 13 stress 0.1335651
## Run 14 stress 0.1518077
## Run 15 stress 0.1392441
## Run 16 stress 0.1248836
## Run 17 stress 0.1300336
## Run 18 stress 0.1301932
## Run 19 stress 0.1454087
## Run 20 stress 0.1135975
## ... Procrustes: rmse 0.009204767 max resid 0.0344076
## *** Solution reached
## [1] 0.1135523
# Make dataframe of PCoA variables and import vent names
sample_order <- c('Mt Edwards Plume','Mt Edwards Vent','Candelabra Vent','SirVentsAlot Vent','Venti Latte Vent','Deep seawater', 'Shallow seawater')
jac_nmds_df_16s <- data.frame(jac_nmds_16s$points) %>%
rownames_to_column(var = "SAMPLEID_16S") %>%
left_join(ventnames_16_mod) %>%
filter(LocationName %in% sample_order) %>%
data.frame## Joining, by = "SAMPLEID_16S"
sample_order <- c('Mt Edwards Plume','Mt Edwards Vent','Candelabra Vent','SirVentsAlot Vent','Venti Latte Vent','Deep seawater', 'Shallow seawater')
sample_color <-c("lightblue","#377eb8", "#e41a1c", "#4daf4a", "#984ea3", "orange", "yellow")
jac_nmds_df_16s$SAMPLE_ORDER <- factor(jac_nmds_df_16s$LocationName, levels = rev(sample_order))
names(sample_color)<-sample_order
library(ggrepel)
# head(jac_nmds_df_16s[1:3,])
options(repr.plot.width = 8, repr.plot.height = 6)
# svg("PCA-wlabels-gr-22-07-2020.svg", w=8, h=8)
nmds_jac_16s <- ggplot(jac_nmds_df_16s, aes(x = MDS1, y = MDS2,
fill = LocationName,
label = NEW_SAMPLEID)) + #Replace label=SAMPLEID.y
geom_hline(yintercept = 0) + geom_vline(xintercept = 0) +
geom_point(aes(x = MDS1, y = MDS2, fill = LocationName), size = 5, shape = 21) +
scale_fill_manual(values = sample_color) +
# labs(x = "NMDS 1", y = "NMDS 2", title = "Jaccard Distance NMDS (16S)") +
labs(x = "NMDS 1", y = "NMDS 2", title = "") +
theme_bw()+
geom_text_repel()+
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank()) +
guides(fill = guide_legend(override.aes=list(shape=21)))
nmds_jac_16s # dev.off()# # Log-Ratio
# library(compositions)
df_log_clr <- data.frame(clr(t(df_format)))
# # Ordination - PCA
# # ?prcomp()
pca_clr <- prcomp(df_log_clr)
# # Check variance
check_variance <- (pca_clr$sdev^2)/sum(pca_clr$sdev^2)
# head(check_variance)
# # Screeplot, how many axes are appropriate?
barplot(check_variance,
main='Log-Ratio PCA Screeplot',
xlab='PC Axis',
ylab='% Variance',
cex.names=1.5,cex.axis=1.5,cex.lab=1.5,cex.main=1.5)sample_order <- c('Mt Edwards Plume','Mt Edwards Vent','Candelabra Vent','SirVentsAlot Vent','Venti Latte Vent','Deep seawater', 'Shallow seawater')
df_pca_clr <- data.frame(pca_clr$x, SAMPLEID_16S = rownames(pca_clr$x))
df_pca_clr_wnames <- df_pca_clr %>%
left_join(ventnames_16_mod) %>%
filter(LocationName %in% sample_order) %>%
data.frame## Joining, by = "SAMPLEID_16S"
sample_order <- c('Mt Edwards Plume','Mt Edwards Vent','Candelabra Vent','SirVentsAlot Vent','Venti Latte Vent','Deep seawater', 'Shallow seawater')
sample_color <- c("lightblue","#377eb8", "#e41a1c", "#4daf4a", "#984ea3", "orange", "yellow")
df_pca_clr_wnames$SAMPLE_ORDER <- factor(df_pca_clr_wnames$LocationName, levels = rev(sample_order))
names(sample_color)<-sample_order
library(ggrepel)
# options(repr.plot.width = 8, repr.plot.height = 6)
# svg("PCoA-16S-wolabels.svg", h = 8, w = 8)
pca_16s <- ggplot(df_pca_clr_wnames, aes(x = PC1, y = PC2, fill = SAMPLE_ORDER, label = NEW_SAMPLEID)) +
geom_point(aes(x = PC1,y = PC2, fill = LocationName), size = 4, shape = 21) +
scale_fill_manual(values = sample_color) +
ylab(paste0('PC2 ',round(check_variance[2]*100,2),'%')) +
xlab(paste0('PC1 ',round(check_variance[1]*100,2),'%')) +
ggtitle('16S - CLR PCA Ordination') +
geom_text_repel() +
theme_bw() +
theme(axis.text = element_text(color = "black", size = 12)) +
geom_hline(yintercept = 0) + geom_vline(xintercept = 0)
# pca_16s# options(repr.plot.width = 15, repr.plot.height = 6)
nmdsleg <- get_legend(nmds_jac_18s)
# svg("panel-NMDS.svg", w = 15, h = 6)
plot_grid(nmds_jac_18s + theme(legend.position = "none"),
nmds_jac_16s + theme(legend.position = "none"),
nmdsleg, nrow = 1, rel_widths = c(1,1,0.5),
labels = c("a. 18S", "b. 16S", ""))## Loading objects:
## gr_counts_filter
## gr_counts_wtax
## gr_counts_avg_wtax
## Sampletype LocationName
## 1 in situ Shallow seawater
## 3 in situ Venti Latte Vent
## 4 in situ Candelabra Vent
## 5 Grazing SirVentsAlot Vent
## 6 in situ Deep seawater
## 8 in situ Mt Edwards Plume
## 12 in situ SirVentsAlot Vent
## 13 Grazing Mt Edwards Plume
## 17 in situ Mt Edwards Vent
## 36 Grazing Mt Edwards Vent
## 40 Grazing Candelabra Vent
## 49 Grazing Venti Latte Vent
## 135 Control Lab blank
bin_category <- gr_counts_avg_wtax %>%
filter(!(Sampletype == "Control")) %>%
select(Feature.ID, Sampletype, LocationName, COUNT_AVG) %>%
data.frame
bin_category$sample_tmp <- paste(bin_category$Sampletype, bin_category$LocationName, sep = "_")
bin_category$sample_tmp <- gsub(" ","_", bin_category$sample_tmp)
# head(bin_category)
bin_category$Sampletype<-NULL; bin_category$LocationName <- NULL
# head(bin_category)
#
library(reshape2)
bin_wide <- dcast(bin_category, Feature.ID ~ sample_tmp) #aggregate to length is OK - counts occurencesshared_all <- bin_wide %>%
filter((in_situ_Shallow_seawater > 0 &
in_situ_Deep_seawater > 0 &
in_situ_Mt_Edwards_Plume > 0 &
Grazing_Mt_Edwards_Plume > 0 &
in_situ_Mt_Edwards_Vent > 0 &
Grazing_Mt_Edwards_Vent > 0 &
in_situ_Venti_Latte_Vent > 0 &
Grazing_Venti_Latte_Vent > 0 &
in_situ_Candelabra_Vent > 0 &
Grazing_Candelabra_Vent > 0 &
in_situ_SirVentsAlot_Vent > 0 &
Grazing_SirVentsAlot_Vent > 0)) %>%
select(Feature.ID) %>%
distinct() %>%
add_column(CATEGORY = "ASV present in all samples") %>%
data.frame
# dim(shared_all)
asv_key <- shared_all
#
bsw_only <- bin_wide %>%
filter((in_situ_Shallow_seawater > 0 &
in_situ_Deep_seawater > 0 &
in_situ_Mt_Edwards_Plume == 0 &
Grazing_Mt_Edwards_Plume == 0 &
in_situ_Mt_Edwards_Vent == 0 &
Grazing_Mt_Edwards_Vent == 0 &
in_situ_Venti_Latte_Vent == 0 &
Grazing_Venti_Latte_Vent == 0 &
in_situ_Candelabra_Vent == 0 &
Grazing_Candelabra_Vent == 0 &
in_situ_SirVentsAlot_Vent == 0 &
Grazing_SirVentsAlot_Vent == 0)) %>%
select(Feature.ID) %>%
distinct() %>%
add_column(CATEGORY = "BSW only") %>%
data.frame
# head(bsw_only); dim(bsw_only)
#
asv_key <- rbind(asv_key, bsw_only)
# dim(asv_key)
#
shared_edwards <- bin_wide %>%
filter((in_situ_Shallow_seawater == 0 &
in_situ_Deep_seawater == 0 &
in_situ_Mt_Edwards_Plume == 0 &
Grazing_Mt_Edwards_Plume == 0 &
in_situ_Mt_Edwards_Vent > 0 &
Grazing_Mt_Edwards_Vent > 0 &
in_situ_Venti_Latte_Vent == 0 &
Grazing_Venti_Latte_Vent == 0 &
in_situ_Candelabra_Vent == 0 &
Grazing_Candelabra_Vent == 0 &
in_situ_SirVentsAlot_Vent == 0 &
Grazing_SirVentsAlot_Vent == 0)) %>%
select(Feature.ID) %>%
distinct() %>%
add_column(CATEGORY = "Shared between in situ vent and grazing exp") %>%
data.frame
# head(shared_edwards); dim(shared_edwards)
#
shared_edwards_wplume <- bin_wide %>%
filter((in_situ_Shallow_seawater == 0 &
in_situ_Deep_seawater == 0 &
in_situ_Mt_Edwards_Plume > 0 &
Grazing_Mt_Edwards_Plume > 0 &
in_situ_Mt_Edwards_Vent == 0 &
Grazing_Mt_Edwards_Vent == 0 &
in_situ_Venti_Latte_Vent == 0 &
Grazing_Venti_Latte_Vent == 0 &
in_situ_Candelabra_Vent == 0 &
Grazing_Candelabra_Vent == 0 &
in_situ_SirVentsAlot_Vent == 0 &
Grazing_SirVentsAlot_Vent == 0)) %>%
select(Feature.ID) %>%
distinct() %>%
add_column(CATEGORY = "Shared between in situ vent and grazing exp") %>%
data.frame
# head(shared_edwards_wplume); dim(shared_edwards_wplume)
#
shared_ventilatte <- bin_wide %>%
filter((in_situ_Shallow_seawater == 0 &
in_situ_Deep_seawater == 0 &
in_situ_Mt_Edwards_Plume == 0 &
Grazing_Mt_Edwards_Plume == 0 &
in_situ_Mt_Edwards_Vent == 0 &
Grazing_Mt_Edwards_Vent == 0 &
in_situ_Venti_Latte_Vent > 0 &
Grazing_Venti_Latte_Vent > 0 &
in_situ_Candelabra_Vent == 0 &
Grazing_Candelabra_Vent == 0 &
in_situ_SirVentsAlot_Vent == 0 &
Grazing_SirVentsAlot_Vent == 0)) %>%
select(Feature.ID) %>%
distinct() %>%
add_column(CATEGORY = "Shared between in situ vent and grazing exp") %>%
data.frame
# dim(shared_ventilatte)
#
shared_candelabra <- bin_wide %>%
filter((in_situ_Shallow_seawater == 0 &
in_situ_Deep_seawater == 0 &
in_situ_Mt_Edwards_Plume == 0 &
Grazing_Mt_Edwards_Plume == 0 &
in_situ_Mt_Edwards_Vent == 0 &
Grazing_Mt_Edwards_Vent == 0 &
in_situ_Venti_Latte_Vent == 0 &
Grazing_Venti_Latte_Vent == 0 &
in_situ_Candelabra_Vent > 0 &
Grazing_Candelabra_Vent > 0 &
in_situ_SirVentsAlot_Vent == 0 &
Grazing_SirVentsAlot_Vent == 0)) %>%
select(Feature.ID) %>%
distinct() %>%
add_column(CATEGORY = "Shared between in situ vent and grazing exp") %>%
data.frame
# dim(shared_candelabra)
#
shared_Sirventsalot <- bin_wide %>%
filter((in_situ_Shallow_seawater == 0 &
in_situ_Deep_seawater == 0 &
in_situ_Mt_Edwards_Plume == 0 &
Grazing_Mt_Edwards_Plume == 0 &
in_situ_Mt_Edwards_Vent == 0 &
Grazing_Mt_Edwards_Vent == 0 &
in_situ_Venti_Latte_Vent == 0 &
Grazing_Venti_Latte_Vent == 0 &
in_situ_Candelabra_Vent == 0 &
Grazing_Candelabra_Vent == 0 &
in_situ_SirVentsAlot_Vent > 0 &
Grazing_SirVentsAlot_Vent > 0)) %>%
select(Feature.ID) %>%
distinct() %>%
add_column(CATEGORY = "Shared between in situ vent and grazing exp") %>%
data.frametmp_bin <- bin_wide; tmp_bin$SUM <- rowSums(tmp_bin[, 2:13])
tmp_bin2 <- subset(tmp_bin, SUM == 1)
tmp_bin2$CATEGORY <- "Unique to sample"
# head(tmp_bin2)
binned_unique <- tmp_bin2[, c("Feature.ID", "CATEGORY")]
length(unique(binned_unique$Feature.ID)); dim(binned_unique)## [1] 6807
## [1] 6807 2
#
asv_key_2 <- rbind(asv_key, shared_edwards, shared_edwards_wplume, shared_ventilatte, shared_candelabra, shared_Sirventsalot, binned_unique)
dim(asv_key_2); length(unique(asv_key_2$Feature.ID))## [1] 7004 2
## [1] 7004
appears_vent_plume <- bin_wide %>%
filter((in_situ_Shallow_seawater == 0 &
in_situ_Deep_seawater == 0)) %>%
select(Feature.ID) %>%
distinct() %>%
add_column(CATEGORY_appear = "Appears in situ vent or plume (not BSW)") %>%
data.frame
dim(appears_vent_plume)## [1] 7360 2
#
appears_vent <- bin_wide %>%
filter((in_situ_Shallow_seawater == 0 &
in_situ_Deep_seawater == 0 &
in_situ_Mt_Edwards_Plume == 0 &
Grazing_Mt_Edwards_Plume == 0)) %>%
select(Feature.ID) %>%
distinct() %>%
add_column(CATEGORY_appear = "Appears in situ vent (not BSW or plume)") %>%
data.frame
dim(appears_vent)## [1] 4832 2
#
appears_deep <- bin_wide %>%
filter(((in_situ_Shallow_seawater == 0 &
in_situ_Deep_seawater > 0) &
in_situ_Mt_Edwards_Plume > 0 |
# Grazing_Mt_Edwards_Plume == 0 &
in_situ_Mt_Edwards_Vent > 0 |
# Grazing_Mt_Edwards_Vent == 0 &
in_situ_Venti_Latte_Vent > 0 |
# Grazing_Venti_Latte_Vent == 0 &
in_situ_Candelabra_Vent > 0 |
# Grazing_Candelabra_Vent == 0 &
in_situ_SirVentsAlot_Vent > 0 #&
# Grazing_SirVentsAlot_Vent == 0
)) %>%
select(Feature.ID) %>%
distinct() %>%
add_column(CATEGORY_appear = "Appears in deep BSW and among vent/plume sites") %>%
data.frame
dim(appears_deep)## [1] 4969 2
# Regardless of grazing experiment presence:
# Add in this order, will overwrite the previous entry.
dim(appears_deep) # ASV appears BSW and vent and plume## [1] 4969 2
## [1] 7360 2
## [1] 4832 2
## [1] 7004 2
## [1] "ASV present in all samples"
## [2] "BSW only"
## [3] "Shared between in situ vent and grazing exp"
## [4] "Unique to sample"
## Feature.ID Grazing_Candelabra_Vent
## 1 0009645516609bda2246e1955ff9ec1d 0
## 2 0030ad8ce44f257c42daf3673bf92197 0
## 3 0038478be7fb4f097ce93a5e9341af2a 0
## 4 003b5938e31a8c1b1809e0358da894e0 0
## 5 003ff3e98dff52952a7036585a32c2f2 0
## 6 004ebe8047915b78deefc412bef467b7 0
## Grazing_Mt_Edwards_Plume Grazing_Mt_Edwards_Vent Grazing_SirVentsAlot_Vent
## 1 0 0 0
## 2 0 0 1
## 3 3 0 0
## 4 0 0 0
## 5 1 0 0
## 6 0 0 0
## Grazing_Venti_Latte_Vent in_situ_Candelabra_Vent in_situ_Deep_seawater
## 1 0 0 0
## 2 0 1 0
## 3 0 0 1
## 4 0 0 0
## 5 0 0 0
## 6 0 1 0
## in_situ_Mt_Edwards_Plume in_situ_Mt_Edwards_Vent in_situ_Shallow_seawater
## 1 0 0 1
## 2 0 0 1
## 3 3 0 1
## 4 0 0 1
## 5 0 1 0
## 6 0 0 0
## in_situ_SirVentsAlot_Vent in_situ_Venti_Latte_Vent
## 1 0 0
## 2 0 1
## 3 1 1
## 4 0 0
## 5 0 0
## 6 0 0
## [1] 9028
## [1] 9028 13
#
# New data frame
asv_key_all <- data.frame(bin_wide[,c("Feature.ID")])
colnames(asv_key_all)[1] <- "Feature.ID"
head(asv_key_all)## Feature.ID
## 1 0009645516609bda2246e1955ff9ec1d
## 2 0030ad8ce44f257c42daf3673bf92197
## 3 0038478be7fb4f097ce93a5e9341af2a
## 4 003b5938e31a8c1b1809e0358da894e0
## 5 003ff3e98dff52952a7036585a32c2f2
## 6 004ebe8047915b78deefc412bef467b7
deep <- as.character(unique(appears_deep$Feature.ID))
asv_key_all$SORTED[asv_key_all$Feature.ID %in% deep] = "ASV appears in BSW and throughout vent/plume"
vent_plume <- as.character(unique(appears_vent_plume$Feature.ID))
asv_key_all$SORTED[asv_key_all$Feature.ID %in% vent_plume] = "ASV appears among vent/plume (no BSW)"
vent <- as.character(unique(appears_vent$Feature.ID))
asv_key_all$SORTED[asv_key_all$Feature.ID %in% vent] = "ASV appears among vent (no BSW or plume)"
table(asv_key_all$SORTED)##
## ASV appears among vent (no BSW or plume)
## 4832
## ASV appears among vent/plume (no BSW)
## 2528
## ASV appears in BSW and throughout vent/plume
## 457
#
asv_key_joined <- left_join(asv_key_all, asv_key_2)
# head(asv_key_joined)
asv_key_joined_filled <- data.frame(asv_key_joined, (coalesce(asv_key_joined$CATEGORY, asv_key_joined$SORTED)))
colnames(asv_key_joined_filled)[4] <- "category_final"
table(asv_key_joined_filled$category_final)##
## ASV appears among vent (no BSW or plume)
## 606
## ASV appears among vent/plume (no BSW)
## 894
## ASV appears in BSW and throughout vent/plume
## 446
## ASV present in all samples
## 11
## BSW only
## 7
## Shared between in situ vent and grazing exp
## 179
## Unique to sample
## 6807
Print report on total ASV counts that fall into each category.
gr_sorted <- left_join(gr_counts_avg_wtax, asv_key_final) %>%
filter(!(Sampletype == "Control"))
table(gr_sorted$category_final)##
## ASV appears among vent (no BSW or plume)
## 1926
## ASV appears among vent/plume (no BSW)
## 5184
## ASV appears in BSW and throughout vent/plume
## 4358
## ASV present in all samples
## 268
## BSW only
## 14
## Other
## 205
## Shared between in situ vent and grazing exp
## 546
## Unique to sample
## 6807
## [1] 1260878
gr_sorted_summary <- gr_sorted %>%
group_by(category_final) %>%
summarise(totalasv = n_distinct(Feature.ID), totalseq = sum(COUNT_AVG)) %>%
mutate(Perc_seq = 100*(totalseq/total)) %>%
data.frame
#
# gr_sorted_summary
# sum(gr_sorted_summary$totalasv)
gr_sorted_summary_plots <- gr_sorted %>%
group_by(category_final, LocationName, SAMPLEID) %>%
summarise(totalasv = n_distinct(Feature.ID), totalseq = sum(COUNT_AVG)) %>%
mutate(Perc_seq = 100*(totalseq/total)) %>%
data.framegr_stats_wtax <- gr_sorted %>%
group_by(Taxa, category_final) %>%
summarise(totalasv = n(), totalseq = sum(COUNT_AVG)) %>%
ungroup() %>%
group_by(Taxa, category_final) %>%
summarise(totalasvs = sum(totalasv),
sumseqs = sum(totalseq)) %>%
mutate(percentseq = sumseqs/sum(sumseqs)*100) %>%
pivot_wider(names_from = Taxa, names_glue = "{Taxa}_{.value}", values_from = c(totalasvs, sumseqs, percentseq)) %>%
data.frame
# head(gr_stats_wtax)
# head(gr_sorted)
# write_delim(gr_stats_wtax, path = "Distribution-ASVs-bytax.txt", delim = "\t")
# write_delim(gr_sorted_summary, path = "Distribution-ASVs.txt", delim = "\t")Generate bar plot that summarized sequence and ASV abundance by distribution of ASV.
LocationNameOrder<-c("Shallow seawater", "Deep seawater", "Mt Edwards Plume", "Mt Edwards Vent", "Venti Latte Vent", "Candelabra Vent", "SirVentsAlot Vent")
gr_sorted_summary_plots$LOCATION_ORDER <- factor(gr_sorted_summary_plots$LocationName, levels = LocationNameOrder)
library(RColorBrewer)
category_order <- c("ASV present in all samples", "BSW only", "ASV appears in BSW and throughout vent/plume",
"ASV appears among vent/plume (no BSW)", "ASV appears among vent (no BSW or plume)",
"Shared between in situ vent and grazing exp", "Unique to sample", "Other")
category_color <- c("#003c30","#01665e", "#80cdc1",
"#c51b7d", "#de77ae", "#f1b6da",
"#fdb863", "#b35806")
gr_sorted_summary_plots$CATEGORY_ORDER <- factor(gr_sorted_summary_plots$category_final, levels = category_order)
names(category_color) <- category_ordertotalseq <- ggplot(gr_sorted_summary_plots, aes(x=SAMPLEID, y=totalseq, fill=CATEGORY_ORDER)) +
geom_bar(stat = "identity", color = "black", position = "fill") +
# scale_fill_brewer(palette = "Accent") +
scale_fill_manual(values = category_color) +
scale_y_continuous(expand = c(0, 0)) +
facet_grid(. ~ LOCATION_ORDER, space = "free", scales = "free")+
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, color = "black", face="bold"),
axis.text.y = element_text(color="black", face="bold"),
strip.text = element_text(size = 6, angle = 90, vjust = 0, hjust = 1),
strip.background = element_blank(),
legend.title = element_blank()) +
labs(x="", y="")totalasv <- ggplot(gr_sorted_summary_plots, aes(x=SAMPLEID, y= totalasv, fill=CATEGORY_ORDER)) +
geom_bar(stat = "identity", color = "black", position = "fill") +
# scale_fill_brewer(palette = "Accent") +
scale_fill_manual(values = category_color) +
scale_y_continuous(expand = c(0, 0)) +
facet_grid(. ~ LOCATION_ORDER, space = "free", scales = "free") +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, color = "black", face="bold"),
axis.text.y = element_text(color="black", face="bold"),
strip.text = element_text(size = 6, angle = 90, vjust = 0, hjust = 1),
strip.background = element_blank(),
legend.title = element_blank()) +
labs(x="", y="")# Supplementary figure
# options(repr.plot.width = 15, repr.plot.height = 10)
plot_grid(totalseq, totalasv,
totalseq + geom_bar(stat = "identity", color = "black", position = "stack"),
totalasv + geom_bar(stat = "identity", color = "black", position = "stack"))Sort categories into either “Resident” or “Cosmopolitan”
resident <- c("ASV appears among vent/plume (no BSW)", "ASV appears among vent (no BSW or plume)", "Shared between in situ vent and grazing exp")
cosmo <- c("ASV appears in BSW and throughout vent/plume", "ASV present in all samples")
gr_sorted$RES_COSMO = "Other"
gr_sorted$RES_COSMO[gr_sorted$category_final %in% resident] = "Resident"
gr_sorted$RES_COSMO[gr_sorted$category_final %in% cosmo] = "Cosmopolitan"# head(gr_sorted)
tmp <- gr_sorted %>%
select(Feature.ID, RES_COSMO) %>%
distinct()
table(tmp$RES_COSMO) # ASV distribution##
## Cosmopolitan Other Resident
## 457 6892 1679
## [1] 1260878
tmp2 <- gr_sorted %>%
select(RES_COSMO, COUNT_AVG) %>%
group_by(RES_COSMO) %>%
summarise(SUM = sum(COUNT_AVG)) %>%
mutate(Perc = 100*(SUM/total))## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 3
## RES_COSMO SUM Perc
## <chr> <dbl> <dbl>
## 1 Cosmopolitan 593854. 47.1
## 2 Other 123331 9.78
## 3 Resident 543694. 43.1
# save(gr_sorted, asv_key_final, file = "data-input/GR-countinfo-withASVdistribution.RData")
# head(gr_sorted)gr_sorted_bydist <- gr_sorted %>%
unite("SAMPLE", c("LocationName", "Sampletype", "SAMPLEID"), sep = " ", remove = FALSE) %>%
group_by(SAMPLE, Sampletype, LocationName, SAMPLEID, RES_COSMO) %>%
summarise(totalasv = n(), totalseq = sum(COUNT_AVG)) %>%
data.frame
# head(gr_sorted_bydist)sample_order_all<-c("Shallow seawater in situ sterivex","Deep seawater in situ sterivex","Mt Edwards Plume in situ sterivex","Mt Edwards Plume Grazing T0","Mt Edwards Plume Grazing T24","Mt Edwards Plume Grazing T36","Mt Edwards Vent in situ SUPR","Mt Edwards Vent Grazing T0","Mt Edwards Vent Grazing T36","Venti Latte Vent in situ SUPR","Venti Latte Vent Grazing T0","Venti Latte Vent Grazing T36","Candelabra Vent in situ SUPR","Candelabra Vent Grazing T24","SirVentsAlot Vent in situ SUPR","SirVentsAlot Vent Grazing T24")
sample_name_all<-c("Shallow seawater","Deep seawater","Mt Edwards Plume","Mt Edwards Plume T0","Mt Edwards Plume T23","Mt Edwards Plume T35","Mt Edwards Vent","Mt Edwards Vent T0","Mt Edwards Vent T36","Venti Latte Vent","Venti Latte Vent T0","Venti Latte Vent T29","Candelabra Vent","Candelabra Vent T26","SirVentsAlot Vent","SirVentsAlot Vent T24")
gr_sorted_bydist$SAMPLE_ORDER <- factor(gr_sorted_bydist$SAMPLE, levels = sample_order_all, labels = sample_name_all)
#
LocationNameOrder<-c("Shallow seawater", "Deep seawater", "Mt Edwards Plume", "Mt Edwards Vent", "Venti Latte Vent", "Candelabra Vent", "SirVentsAlot Vent")
gr_sorted_bydist$LOCATION_ORDER <- factor(gr_sorted_bydist$LocationName, levels = LocationNameOrder)
unique(gr_sorted_bydist$Sampletype)## [1] Grazing in situ
## Levels: control Control Grazing in situ
category_order_simple <- c("Cosmopolitan", "Resident", "Other")
category_color_simple <- c("#7fcdbb", "#f768a1", "#fdae61")
gr_sorted_bydist$CATEGORY_ORDER <- factor(gr_sorted_bydist$RES_COSMO, levels = category_order_simple)
names(category_color_simple) <- category_order_simplefilled_seq <- ggplot(gr_sorted_bydist, aes(x = SAMPLE_ORDER, y = totalseq)) +
geom_bar(stat = "identity", position = "stack", color = "black",
alpha = 0.6, aes(fill = CATEGORY_ORDER)) +
scale_fill_manual(values = category_color_simple) +
facet_grid(. ~ LOCATION_ORDER, space = "free", scales = "free") +
scale_y_continuous(expand = c(0,0)) +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(),
# panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, color = "black", face="bold"),
axis.text.y = element_text(color="black", face="bold"),
strip.text = element_text(size = 5, face = "bold"),
strip.background = element_blank(),
legend.title = element_blank()) +
labs(x="", y="Total sequences")
#filled_asv <- ggplot(gr_sorted_bydist, aes(x = SAMPLE_ORDER, y= totalasv)) +
geom_bar(stat = "identity", position = "stack", color = "black",
alpha = 0.6, aes(fill = CATEGORY_ORDER)) +
scale_fill_manual(values = category_color_simple) +
facet_grid(. ~ LOCATION_ORDER, space = "free", scales = "free") +
scale_y_continuous(expand = c(0,0)) +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(),
# panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, color = "black", face="bold"),
axis.text.y = element_text(color="black", face="bold"),
strip.text = element_text(size = 5, face = "bold"),
strip.background = element_blank(),
legend.title = element_blank()) +
labs(x="", y="Total ASVs")legend<-get_legend(filled_seq)
# svg("dist-seq-asv.svg", h = 5, w = 15)
options(repr.plot.width = 15, repr.plot.height = 5)
plot_grid(filled_seq + theme(legend.position = "none"),
filled_asv + theme(legend.position = "none"),
legend, ncol = 3, nrow = 1, rel_widths = c(1,1,0.5), labels = c("a", "b", ""))## Loading objects:
## gr_counts_filter
## gr_counts_wtax
## gr_counts_avg_wtax
## Loading objects:
## gr_sorted
## asv_key_final
# Add Taxa2 level
expand_taxa2 <- function(df){
sumseq <- sum(df$COUNT_AVG)
# Sum asv totals
df_tmp <- df %>% group_by(Feature.ID) %>%
summarise(SUM_ASV = sum(COUNT_AVG)) %>%
mutate(RelAbun = 100*(SUM_ASV/sumseq)) %>%
filter(RelAbun > 0.01) %>%
data.frame
topASVs18s <- as.character(unique(df_tmp$Feature.ID)) #Select ASVs > 0.01% of data
df$Taxa <- as.character(df$Taxa); df$Order <- as.character(df$Order)
df$Class <- as.character(df$Class); df$Division <- as.character(df$Division)
non_ciliate <- c("Alveolata-Syndiniales", "Alveolata-Dinoflagellates", "Alveolata-Other")
order <- c("Alveolata-Syndiniales", "Alveolata-Dinoflagellates")
class <- c("Alveolata-Ciliates", "Stramenopiles-Ochrophyta", "Stramenopiles-MAST", "Opisthokonta-Metazoa", "Opisthokonta-Fungi", "Opisthokonta-Other")
df2 <- df %>%
mutate(Taxa2 = Taxa) %>% # Duplicate Taxa column
mutate(Taxa2 = ifelse(Taxa %in% order, Order, Taxa2),
Taxa2 = ifelse(Taxa %in% class, Class, Taxa2),
Taxa2 = ifelse(Taxa %in% "Amoebozoa", Division, Taxa2),
# Curate Ciliates
Taxa2 = ifelse(Class == "Spirotrichea", paste(Class, Order, sep = "-"), Taxa2),
Taxa2 = ifelse(grepl("Spirotrichea_", Taxa2), "Spirotrichea-Other", Taxa2),
Taxa2 = ifelse(grepl("Spirotrichea-NA", Taxa2), "Spirotrichea-Other", Taxa2),
Taxa2 = ifelse(grepl("Spirotrichea-Strombidiida_", Taxa2), "CONThreeP", Taxa2),
Taxa2 = ifelse(grepl("CONTH_", Taxa2), "Spirotrichea-Strombidiida", Taxa2),
# Curate Radiolaria
Taxa2 = ifelse(Class == "Acantharea", Order, Taxa2),
Taxa2 = ifelse(Class == "Polycystinea", Order, Taxa2),
Taxa2 = ifelse(Taxa2 == "Rhizaria-Radiolaria-Other", "Rhizaria-Radiolaria", Taxa2),
Taxa2 = ifelse(Taxa2 == "Rhizaria-Cercozoa-Other", "Rhizaria-Cercozoa", Taxa2),
# Add hacrobia resolution
Taxa2 = ifelse(Taxa2 == "Hacrobia-Other", Division, Taxa2),
# Add Excavata resolution
Taxa2 = ifelse(Taxa2 == "Excavata", Division, Taxa2),
# Curate Stramenopiles
# Taxa2 = ifelse(Taxa == "Stramenopiles-MAST", Taxa, Taxa2),
Taxa2 = ifelse(grepl("MOCH-", Taxa2), "MOCH", Taxa2),
# Curate other unknown - Move low abundance ASVs to "Other"
Taxa2 = ifelse(grepl("_X", Taxa2), Taxa, Taxa2),
Taxa2 = ifelse(is.na(Taxa2), Taxa, Taxa2),
# Taxa2 = ifelse(!(Feature.ID %in% topASVs18s), paste(Taxa, "Other", sep = "-"), Taxa2),
Taxa2 = ifelse(Taxa2 == "Unassigned-Eukaryote-Other", "Unassigned-Eukaryote", Taxa2),
Taxa2 = ifelse(grepl("-Other-Other", Taxa2), Taxa, Taxa2)) %>%
mutate(Broad_Taxa = Taxa) %>%
mutate(Broad_Taxa = ifelse(Broad_Taxa %in% non_ciliate, "Alveolata", Broad_Taxa),
Broad_Taxa = ifelse(grepl("Rhizaria", Broad_Taxa), "Rhizaria", Broad_Taxa),
Broad_Taxa = ifelse(grepl("Stramenopiles", Broad_Taxa), "Stramenopiles", Broad_Taxa),
Broad_Taxa = ifelse(grepl("Archaeplastida", Broad_Taxa), "Archaeplastida", Broad_Taxa),
Broad_Taxa = ifelse(grepl("Hacrobia", Broad_Taxa), "Hacrobia", Broad_Taxa),
Broad_Taxa = ifelse(grepl("Opisthokonta", Broad_Taxa), "Opisthokonta", Broad_Taxa)) %>%
data.frame
return(df2)
}## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Rhizaria" "Stramenopiles" "Opisthokonta"
## [4] "Alveolata-Ciliates" "Alveolata" "Unassigned-Eukaryote"
## [7] "Hacrobia" "Archaeplastida" "Excavata"
## [10] "Amoebozoa"
# Add categories & set up for plotting function
gr_tax_res <- gr_counts_avg_wtax2 %>%
left_join(asv_key_final) %>%
unite(SAMPLE, LocationName, Sampletype, SAMPLEID, sep = "-", remove = FALSE) %>%
data.frame## Joining, by = "Feature.ID"
#
resident <- c("ASV appears among vent/plume (no BSW)", "ASV appears among vent (no BSW or plume)", "Shared between in situ vent and grazing exp")
cosmo <- c("ASV appears in BSW and throughout vent/plume", "ASV present in all samples")
gr_tax_res$RES_COSMO = "Other"
gr_tax_res$RES_COSMO[gr_tax_res$category_final %in% resident] = "Resident"
gr_tax_res$RES_COSMO[gr_tax_res$category_final %in% cosmo] = "Cosmopolitan"sample_order_all<-c("Shallow seawater-in situ-sterivex","Deep seawater-in situ-sterivex","Mt Edwards Plume-in situ-sterivex","Mt Edwards Plume-Grazing-T0","Mt Edwards Plume-Grazing-T24","Mt Edwards Plume-Grazing-T36","Mt Edwards Vent-in situ-SUPR","Mt Edwards Vent-Grazing-T0","Mt Edwards Vent-Grazing-T36","Venti Latte Vent-in situ-SUPR","Venti Latte Vent-Grazing-T0","Venti Latte Vent-Grazing-T36","Candelabra Vent-in situ-SUPR","Candelabra Vent-Grazing-T24","SirVentsAlot Vent-in situ-SUPR","SirVentsAlot Vent-Grazing-T24")
sample_name_all<-c("Shallow seawater","Deep seawater","Mt Edwards Plume","Mt Edwards Plume T0","Mt Edwards Plume T23","Mt Edwards Plume T35","Mt Edwards Vent","Mt Edwards Vent T0","Mt Edwards Vent T36","Venti Latte Vent","Venti Latte Vent T0","Venti Latte Vent T29","Candelabra Vent","Candelabra Vent T26","SirVentsAlot Vent","SirVentsAlot Vent T24")
prepdf_tax_dist <- function(df){
df2 <- df %>%
# filter(category_final %in% category) %>%
# average across replicates
group_by(Feature.ID, RES_COSMO, SAMPLE, SAMPLEID, Sampletype, LocationName, Broad_Taxa, Taxon_updated, Taxa, Taxa2) %>%
summarise(COUNT_AVG2 = mean(COUNT_AVG)) %>%
ungroup() %>%
# sum by like taxa
group_by(RES_COSMO, SAMPLE, SAMPLEID, Sampletype, LocationName, Broad_Taxa, Taxa, Taxa2) %>%
summarise(SUM = sum(COUNT_AVG2)) %>%
data.frame
df2$SAMPLENAME<-factor(df2$SAMPLE, levels = sample_order_all, labels = sample_name_all)
df2$SAMPLEID_ORDER <- factor(df2$SAMPLEID, levels = c("sterivex", "SUPR", "T0", "T24", "T36"))
df2$LOCATION_ORDER <- factor(df2$LocationName, levels=c("Shallow seawater", "Deep seawater", "Mt Edwards Plume", "Mt Edwards Vent", "Venti Latte Vent", "Candelabra Vent", "SirVentsAlot Vent"))
return(df2)
}gr_tax_res_toplot <- prepdf_tax_dist(gr_tax_res)
# head(gr_tax_res_toplot[1:2,])
# View(gr_tax_res_toplot %>% select(Taxa, Taxa2) %>% distinct())gr_tax_res_richness <- gr_tax_res %>%
# Average across replicates
group_by(Feature.ID, RES_COSMO, SAMPLE, SAMPLEID, Sampletype, LocationName, Broad_Taxa, Taxon_updated, Taxa, Taxa2) %>%
summarise(COUNT_AVG2 = mean(COUNT_AVG)) %>%
ungroup() %>%
# Get richness for each taxonomic group
group_by(RES_COSMO, Broad_Taxa, Taxa, Taxa2) %>%
summarise(RICHNESS = n_distinct(Feature.ID)) %>%
data.frame## `summarise()` regrouping output by 'Feature.ID', 'RES_COSMO', 'SAMPLE', 'SAMPLEID', 'Sampletype', 'LocationName', 'Broad_Taxa', 'Taxon_updated', 'Taxa' (override with `.groups` argument)
## `summarise()` regrouping output by 'RES_COSMO', 'Broad_Taxa', 'Taxa' (override with `.groups` argument)
# Factor
ciliate_order <- c("Heterotrichea","Oligohymenophorea","Litostomatea","Karyorelictea","Nassophorea","Phyllopharyngea","Plagiopylea","CONThreeP","Spirotrichea-Choreotrichida","Spirotrichea-Euplotia","Spirotrichea-Hypotrichia","Spirotrichea-Strombidiida","Spirotrichea-Tintinnida","Spirotrichea-Other","Alveolata-Ciliates")
gr_tax_res_toplot$CILIATE_ORDER <- factor(gr_tax_res_toplot$Taxa2, levels = ciliate_order)
CILIATE_COLOR <- c("#ffffcc","#d9f0a3","#addd8e","#78c679","#31a354","#006837","#fde0dd","#fcc5c0","#fa9fb5","#f768a1","#dd3497","#ae017e","#7a0177","#49006a","#bdbdbd")
names(CILIATE_COLOR)<-ciliate_order
# head(gr_tax_res_toplot)
ciliate_plot <- gr_tax_res_toplot %>%
filter(Taxa %in% "Alveolata-Ciliates") %>%
filter(!(RES_COSMO == "Other")) %>%
ggplot(aes(x = SAMPLENAME, y = SUM, fill = CILIATE_ORDER)) +
geom_bar(stat = "identity", position = "fill", color = "black") +
scale_fill_manual(values = CILIATE_COLOR) +
scale_y_continuous(expand = c(0,0))+
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, color = "black", face="bold", size = 12),
axis.text.y = element_text(color="black", face="bold", size = 12),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.title = element_blank()) +
labs(x="", y="Relative abundance")+
facet_grid(RES_COSMO~LOCATION_ORDER, space = "free", scales = "free")Repeat high-resolution look at protistan biogeography, but without ciliate groups.
# Heterotrophs - non-ciliate
tax_order <- c("Prorocentrales","Gymnodiniales","Peridiniales","Gonyaulacales","Torodiniales","Suessiales","Noctilucales","Alveolata-Dinoflagellates","Dino-Group-I","Dino-Group-II","Dino-Group-IV","Dino-Group-III","Dino-Group-V","Alveolata-Syndiniales","Alveolata-Other","Rhizaria-Cercozoa","Rhizaria-Radiolaria-Acantharea","Rhizaria-Radiolaria-Polycystinea","Rhizaria-Radiolaria","Rhizaria-Other","Stramenopiles-MAST","Bacillariophyta","Dictyochophyceae","Chrysophyceae","MOCH","Bolidophyceae","Pelagophyceae","Synurophyceae","Stramenopiles-Ochrophyta","Stramenopiles-Other","Hacrobia-Cryptophyta","Hacrobia-Haptophyta","Centroheliozoa","Telonemia","Picozoa","Katablepharidophyta","Hacrobia-Other","Amoebozoa","Discoba","Metamonada","Archaeplastida-Chlorophyta","Archaeplastida-Other","Unassigned-Eukaryote","Basidiomycota","Chytridiomycota","Opisthokonta-Fungi","Ascomycota","Microsporidiomycota","Cnidaria","Craniata","Mollusca","Nemertea","Opisthokonta-Metazoa","Annelida","Arthropoda","Rotifera","Urochordata","Nematoda","Ctenophora","Echinodermata","Platyhelminthes","Porifera","Gastrotricha","Opisthokonta-Other","Choanoflagellatea","Ichthyosporea","Filasterea")
color_order <- c("#7f0000","#b30000","#d7301f","#ef6548","#fc8d59","#fdbb84","#fdd49e","#fee8c8","#deebf7","#9ecae1","#6baed6","#2171b5","#08519c","#08306b","#ffffff","#800026","#bd0026","#e31a1c","#fc4e2a","#fd8d3c","#feb24c","#fed976","#ffeda0","#ffffcc","#fec44f","#fe9929","#ec7014","#cc4c02","#993404","#662506","#e5f5e0","#a1d99b","#74c476","#41ab5d","#238b45","#006d2c","#00441b","#edf8b1","#7fcdbb","#1d91c0","#4292c6","#08519c","#f0f0f0","#4d004b","#810f7c","#88419d","#8c6bb1","#8c96c6","#9ebcda","#bfd3e6","#e0ecf4","#f7fcfd","#67001f","#980043","#ce1256","#e7298a","#df65b0","#c994c7","#d4b9da","#e7e1ef","#014636","#016c59","#02818a","#3690c0","#67a9cf","#a6bddb","#d0d1e6")
gr_tax_res_toplot$TAXORDER <- factor(gr_tax_res_toplot$Taxa2, levels = tax_order)
names(color_order) <- tax_order
hets_plot <- ggplot(gr_tax_res_toplot, aes(x = SAMPLENAME, y = SUM, fill = TAXORDER)) +
geom_bar(stat = "identity", position = "fill", color = "black") +
scale_fill_manual(values = color_order) +
scale_y_continuous(expand = c(0,0))+
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, color = "black", face="bold", size = 12),
axis.text.y = element_text(color="black", face="bold", size = 12),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.title = element_blank()) +
labs(x="", y="Relative abundance")+
# guides(fill = guide_legend(ncol = 1)) +
facet_grid(RES_COSMO~LOCATION_ORDER, space = "free", scales = "free")metaz <- c("Opisthokonta-Fungi", "Opisthokonta-Other", "Opisthokonta-Metazoa")
# hets_plot %+% subset(gr_tax_res_toplot, !(RES_COSMO == "Other") & (!(Taxa %in% metaz)) & (!(Taxa == "Alveolata-Ciliates")))# svg("Supplementary-metazoa-plot.svg", h = 8, w = 10)
hets_plot %+% subset(gr_tax_res_toplot, !(RES_COSMO == "Other") & (Taxa %in% metaz))# options(repr.plot.width = 18, repr.plot.height = 8)
# svg("tax-res-withdist-GR.svg", w = 18, h = 8)
# ciliate_key <- get_legend(ciliate_plot)
# other_key <- get_legend(hets_plot %+% subset(gr_tax_res_toplot, !(RES_COSMO == "Other") & (!(Taxa %in% metaz)) & (!(Taxa == "Alveolata-Ciliates"))))
# plot_grid(ciliate_plot + theme(legend.position = "none"),
# ciliate_key,
# hets_plot %+% subset(gr_tax_res_toplot, !(RES_COSMO == "Other") & (!(Taxa %in% metaz)) & (!(Taxa == "Alveolata-Ciliates"))) + theme(legend.position = "none"),
# other_key,
# ncol = 4, labels = c("a","", "b", ""), rel_widths = c(1, 0.2, 1, 0.2))
# dev.off()## RES_COSMO SAMPLE SAMPLEID Sampletype LocationName
## 1 Cosmopolitan Candelabra Vent-Grazing-T24 T24 Grazing Candelabra Vent
## 2 Cosmopolitan Candelabra Vent-Grazing-T24 T24 Grazing Candelabra Vent
## 3 Cosmopolitan Candelabra Vent-Grazing-T24 T24 Grazing Candelabra Vent
## 4 Cosmopolitan Candelabra Vent-Grazing-T24 T24 Grazing Candelabra Vent
## 5 Cosmopolitan Candelabra Vent-Grazing-T24 T24 Grazing Candelabra Vent
## 6 Cosmopolitan Candelabra Vent-Grazing-T24 T24 Grazing Candelabra Vent
## Broad_Taxa Taxa Taxa2 SUM
## 1 Alveolata Alveolata-Dinoflagellates Alveolata-Dinoflagellates 1880
## 2 Alveolata Alveolata-Dinoflagellates Gymnodiniales 294
## 3 Alveolata Alveolata-Dinoflagellates Peridiniales 36
## 4 Alveolata Alveolata-Dinoflagellates Prorocentrales 37
## 5 Alveolata Alveolata-Syndiniales Dino-Group-I 315
## 6 Alveolata Alveolata-Syndiniales Dino-Group-II 199
## SAMPLENAME SAMPLEID_ORDER LOCATION_ORDER CILIATE_ORDER
## 1 Candelabra Vent T26 T24 Candelabra Vent <NA>
## 2 Candelabra Vent T26 T24 Candelabra Vent <NA>
## 3 Candelabra Vent T26 T24 Candelabra Vent <NA>
## 4 Candelabra Vent T26 T24 Candelabra Vent <NA>
## 5 Candelabra Vent T26 T24 Candelabra Vent <NA>
## 6 Candelabra Vent T26 T24 Candelabra Vent <NA>
## TAXORDER
## 1 Alveolata-Dinoflagellates
## 2 Gymnodiniales
## 3 Peridiniales
## 4 Prorocentrales
## 5 Dino-Group-I
## 6 Dino-Group-II
gr_relAbun_toheat <- gr_tax_res_toplot %>%
# filter(Taxa %in% "Alveolata-Ciliates") %>%
filter(!(RES_COSMO == "Other")) %>%
#Determine relative abundance within samples
group_by(Broad_Taxa, SAMPLENAME) %>%
mutate(SUMTOTAL = sum(SUM),
RelAbun = 100*(SUM/SUMTOTAL)) %>%
data.frame
# Re-factor
tax2_order_all <- c("Heterotrichea","Oligohymenophorea","Litostomatea","Karyorelictea","Nassophorea","Phyllopharyngea","Plagiopylea","CONThreeP","Spirotrichea-Choreotrichida","Spirotrichea-Euplotia","Spirotrichea-Hypotrichia","Spirotrichea-Strombidiida","Spirotrichea-Tintinnida","Spirotrichea-Other","Alveolata-Ciliates","Prorocentrales","Gymnodiniales","Peridiniales","Gonyaulacales","Torodiniales","Suessiales","Noctilucales","Alveolata-Dinoflagellates","Dino-Group-I","Dino-Group-II","Dino-Group-IV","Dino-Group-III","Dino-Group-V","Alveolata-Syndiniales","Alveolata-Other","Rhizaria-Cercozoa","Rhizaria-Radiolaria-Acantharea","Rhizaria-Radiolaria-Polycystinea","Rhizaria-Radiolaria","Rhizaria-Other","Stramenopiles-MAST","Bacillariophyta","Dictyochophyceae","Chrysophyceae","MOCH","Bolidophyceae","Pelagophyceae","Synurophyceae","Stramenopiles-Ochrophyta","Stramenopiles-Other","Hacrobia-Cryptophyta","Hacrobia-Haptophyta","Centroheliozoa","Telonemia","Picozoa","Katablepharidophyta","Hacrobia-Other","Amoebozoa","Discoba","Metamonada","Archaeplastida-Chlorophyta","Archaeplastida-Other","Unassigned-Eukaryote","Basidiomycota","Chytridiomycota","Opisthokonta-Fungi","Ascomycota","Microsporidiomycota","Cnidaria","Craniata","Mollusca","Nemertea","Opisthokonta-Metazoa","Annelida","Arthropoda","Rotifera","Urochordata","Nematoda","Ctenophora","Echinodermata","Platyhelminthes","Porifera","Gastrotricha","Opisthokonta-Other","Choanoflagellatea","Ichthyosporea","Filasterea")
gr_relAbun_toheat$TAXA2_ORDER <- factor(gr_relAbun_toheat$Taxa2, levels = tax2_order_all)
gr_relAbun_toheat$Broad_ORDER <- factor(gr_relAbun_toheat$Broad_Taxa, levels = c("Alveolata-Ciliates", "Alveolata", "Rhizaria", "Stramenopiles", "Hacrobia", "Amoebozoa", "Excavata", "Archaeplastida", "Opisthokonta", "Unassigned-Eukaryote"))
broad_color <- c("#ae017e","#bd0026","#ec7014","#238b45","#016c59","#08519c","#54278f","#810f7c") # Make geom tile plot
tile_tax <- ggplot(gr_relAbun_toheat, aes(x = SAMPLENAME, alpha = RelAbun, fill = Broad_ORDER, y = Taxa2)) +
geom_tile(color = "white") +
scale_fill_manual(values = broad_color) +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, color = "black", size = 8),
axis.text.y = element_text(color="black", size = 8),
strip.background = element_blank(),
legend.title = element_blank()) +
labs(x="", y="")+
facet_grid(Broad_ORDER~RES_COSMO, space = "free", scales = "free")
metaz <- c("Opisthokonta-Fungi", "Opisthokonta-Other", "Opisthokonta-Metazoa")
tile_tax %+% subset(gr_relAbun_toheat, !(Taxa %in% metaz | Taxa == "Unassigned-Eukaryote"))gr_tax_res_richness$Broad_ORDER <- factor(gr_tax_res_richness$Broad_Taxa, levels = c("Alveolata-Ciliates", "Alveolata", "Rhizaria", "Stramenopiles", "Hacrobia", "Amoebozoa", "Excavata", "Archaeplastida", "Opisthokonta", "Unassigned-Eukaryote"))
# head(gr_tax_res_richness)
# bubble plot richness
richness_df <- gr_tax_res_richness %>%
filter(!(RES_COSMO == "Other")) %>%
filter(!(Taxa %in% metaz | Taxa == "Unassigned-Eukaryote")) %>%
data.frame
#
richness_plot <- ggplot(richness_df, aes(x = RES_COSMO, y = Taxa2)) +
geom_point(aes(size = RICHNESS)) +
scale_size_continuous(range = c(0.2, 3)) +
facet_grid(Broad_ORDER~RES_COSMO, space = "free", scales = "free") +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 90, size = 8),
axis.text.y = element_text(size = 8),
strip.text = element_blank(),
strip.background = element_blank(),
legend.title = element_blank()) +
labs(x="", y="")tile <- get_legend(tile_tax %+% subset(gr_relAbun_toheat, !(Taxa %in% metaz | Taxa == "Unassigned-Eukaryote")))
rich <- get_legend(richness_plot)
# svg("tile_bubble_plot.svg", w = 22, h = 10)
plot_grid(tile_tax %+% subset(gr_relAbun_toheat, !(Taxa %in% metaz | Taxa == "Unassigned-Eukaryote")) + theme(legend.position = "none"),
richness_plot + theme(legend.position = "none"),
tile, rich, ncol = 4,
axis = "b")tax_order <- c("Prorocentrales","Gymnodiniales","Peridiniales","Gonyaulacales","Torodiniales","Suessiales","Noctilucales","Alveolata-Dinoflagellates","Dino-Group-I","Dino-Group-II","Dino-Group-IV","Dino-Group-III","Dino-Group-V","Alveolata-Syndiniales","Alveolata-Other","Rhizaria-Cercozoa","Rhizaria-Radiolaria-Acantharea","Rhizaria-Radiolaria-Polycystinea","Rhizaria-Radiolaria","Rhizaria-Other","Stramenopiles-MAST","Bacillariophyta","Dictyochophyceae","Chrysophyceae","MOCH","Bolidophyceae","Pelagophyceae","Synurophyceae","Stramenopiles-Ochrophyta","Stramenopiles-Other","Hacrobia-Cryptophyta","Hacrobia-Haptophyta","Centroheliozoa","Telonemia","Picozoa","Katablepharidophyta","Hacrobia-Other","Amoebozoa","Discoba","Metamonada","Archaeplastida-Chlorophyta","Archaeplastida-Other","Unassigned-Eukaryote","Basidiomycota","Chytridiomycota","Opisthokonta-Fungi","Ascomycota","Microsporidiomycota","Cnidaria","Craniata","Mollusca","Nemertea","Opisthokonta-Metazoa","Annelida","Arthropoda","Rotifera","Urochordata","Nematoda","Ctenophora","Echinodermata","Platyhelminthes","Porifera","Gastrotricha","Opisthokonta-Other","Choanoflagellatea","Ichthyosporea","Filasterea")
color_order <- c("#7f0000","#b30000","#d7301f","#ef6548","#fc8d59","#fdbb84","#fdd49e","#fee8c8","#deebf7","#9ecae1","#6baed6","#2171b5","#08519c","#08306b","#ffffff","#800026","#bd0026","#e31a1c","#fc4e2a","#fd8d3c","#feb24c","#fed976","#ffeda0","#ffffcc","#fec44f","#fe9929","#ec7014","#cc4c02","#993404","#662506","#e5f5e0","#a1d99b","#74c476","#41ab5d","#238b45","#006d2c","#00441b","#edf8b1","#7fcdbb","#1d91c0","#4292c6","#08519c","#f0f0f0","#4d004b","#810f7c","#88419d","#8c6bb1","#8c96c6","#9ebcda","#bfd3e6","#e0ecf4","#f7fcfd","#67001f","#980043","#ce1256","#e7298a","#df65b0","#c994c7","#d4b9da","#e7e1ef","#014636","#016c59","#02818a","#3690c0","#67a9cf","#a6bddb","#d0d1e6")
metaz <- c("Opisthokonta-Metazoa", "Opisthokonta-Fungi", "Opisthokonta-Other")
supp_plot <- ggplot(gr_tax_res_toplot, aes(x = SAMPLENAME, y = SUM, fill = Taxa2)) +
geom_bar(stat = "identity", position = "fill", color = "black") +
scale_fill_manual(values = color_order) +
scale_y_continuous(expand = c(0,0))+
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, color = "black", face="bold", size = 12),
axis.text.y = element_text(color="black", face="bold", size = 12),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.title = element_blank()) +
labs(x="", y="Relative abundance")+
facet_grid(RES_COSMO~LOCATION_ORDER, space = "free", scales = "free")
supp_plot %+% subset(gr_tax_res_toplot, !(RES_COSMO == "Other") & Taxa %in% metaz)## Feature.ID SAMPLE SAMPLEID
## 1 0009645516609bda2246e1955ff9ec1d Shallow seawater-in situ-sterivex sterivex
## 2 0030ad8ce44f257c42daf3673bf92197 Shallow seawater-in situ-sterivex sterivex
## 3 0030ad8ce44f257c42daf3673bf92197 Venti Latte Vent-in situ-SUPR SUPR
## 4 0030ad8ce44f257c42daf3673bf92197 Candelabra Vent-in situ-SUPR SUPR
## 5 0030ad8ce44f257c42daf3673bf92197 SirVentsAlot Vent-Grazing-T24 T24
## 6 0038478be7fb4f097ce93a5e9341af2a Deep seawater-in situ-sterivex sterivex
## Sampletype LOCATION_SPECIFIC LocationName
## 1 in situ BSW081 Shallow seawater
## 2 in situ BSW081 Shallow seawater
## 3 in situ Vent040 Venti Latte Vent
## 4 in situ Vent088 Candelabra Vent
## 5 Grazing Vent110 SirVentsAlot Vent
## 6 in situ BSW056 Deep seawater
## Taxon_updated
## 1 Eukaryota;Rhizaria;Radiolaria;Acantharea;Acantharea-Group-II;Acantharea-Group-II_X;Acantharea-Group-II_XX;Acantharea-Group-II_XX_sp.
## 2 Eukaryota;Stramenopiles;Opalozoa;MAST-3;MAST-3J;MAST-3J_X;MAST-3J_XX;MAST-3J_XX_sp.
## 3 Eukaryota;Stramenopiles;Opalozoa;MAST-3;MAST-3J;MAST-3J_X;MAST-3J_XX;MAST-3J_XX_sp.
## 4 Eukaryota;Stramenopiles;Opalozoa;MAST-3;MAST-3J;MAST-3J_X;MAST-3J_XX;MAST-3J_XX_sp.
## 5 Eukaryota;Stramenopiles;Opalozoa;MAST-3;MAST-3J;MAST-3J_X;MAST-3J_XX;MAST-3J_XX_sp.
## 6 Eukaryota;Opisthokonta;Metazoa;Cnidaria;Cnidaria_X;Hydrozoa;Aglaura;Aglaura_hemistoma
## Kingdom Supergroup Division Class Order
## 1 Eukaryota Rhizaria Radiolaria Acantharea Acantharea-Group-II
## 2 Eukaryota Stramenopiles Opalozoa MAST-3 MAST-3J
## 3 Eukaryota Stramenopiles Opalozoa MAST-3 MAST-3J
## 4 Eukaryota Stramenopiles Opalozoa MAST-3 MAST-3J
## 5 Eukaryota Stramenopiles Opalozoa MAST-3 MAST-3J
## 6 Eukaryota Opisthokonta Metazoa Cnidaria Cnidaria_X
## Family Genus Species
## 1 Acantharea-Group-II_X Acantharea-Group-II_XX Acantharea-Group-II_XX_sp.
## 2 MAST-3J_X MAST-3J_XX MAST-3J_XX_sp.
## 3 MAST-3J_X MAST-3J_XX MAST-3J_XX_sp.
## 4 MAST-3J_X MAST-3J_XX MAST-3J_XX_sp.
## 5 MAST-3J_X MAST-3J_XX MAST-3J_XX_sp.
## 6 Hydrozoa Aglaura Aglaura_hemistoma
## Taxa COUNT_AVG Taxa2 Broad_Taxa
## 1 Rhizaria-Radiolaria 80 Acantharea-Group-II Rhizaria
## 2 Stramenopiles-MAST 36 MAST-3 Stramenopiles
## 3 Stramenopiles-MAST 12 MAST-3 Stramenopiles
## 4 Stramenopiles-MAST 34 MAST-3 Stramenopiles
## 5 Stramenopiles-MAST 15 MAST-3 Stramenopiles
## 6 Opisthokonta-Metazoa 21 Cnidaria Opisthokonta
## category_final RES_COSMO
## 1 Unique to sample Other
## 2 ASV appears in BSW and throughout vent/plume Cosmopolitan
## 3 ASV appears in BSW and throughout vent/plume Cosmopolitan
## 4 ASV appears in BSW and throughout vent/plume Cosmopolitan
## 5 ASV appears in BSW and throughout vent/plume Cosmopolitan
## 6 ASV appears in BSW and throughout vent/plume Cosmopolitan
sirvents <- gr_tax_res %>%
filter(LocationName == "SirVentsAlot Vent") %>%
# sum by like taxa
group_by(LOCATION_SPECIFIC, RES_COSMO, SAMPLE, SAMPLEID, Sampletype, LocationName, Broad_Taxa, Taxa, Taxa2) %>%
summarise(SUM = sum(COUNT_AVG)) %>%
filter(!(RES_COSMO == "Other")) %>%
#Determine relative abundance within samples
group_by(Broad_Taxa, LOCATION_SPECIFIC, SAMPLE) %>%
mutate(SUMTOTAL = sum(SUM),
RelAbun = 100*(SUM/SUMTOTAL)) %>%
unite(SAMPLE_REPS, LOCATION_SPECIFIC, SAMPLE, sep = " ", remove = FALSE) %>%
data.frame## `summarise()` regrouping output by 'LOCATION_SPECIFIC', 'RES_COSMO', 'SAMPLE', 'SAMPLEID', 'Sampletype', 'LocationName', 'Broad_Taxa', 'Taxa' (override with `.groups` argument)
## SAMPLE_REPS LOCATION_SPECIFIC RES_COSMO
## 1 Vent105 SirVentsAlot Vent-in situ-SUPR Vent105 Cosmopolitan
## 2 Vent105 SirVentsAlot Vent-in situ-SUPR Vent105 Cosmopolitan
## 3 Vent105 SirVentsAlot Vent-in situ-SUPR Vent105 Cosmopolitan
## 4 Vent105 SirVentsAlot Vent-in situ-SUPR Vent105 Cosmopolitan
## 5 Vent105 SirVentsAlot Vent-in situ-SUPR Vent105 Cosmopolitan
## 6 Vent105 SirVentsAlot Vent-in situ-SUPR Vent105 Cosmopolitan
## SAMPLE SAMPLEID Sampletype LocationName
## 1 SirVentsAlot Vent-in situ-SUPR SUPR in situ SirVentsAlot Vent
## 2 SirVentsAlot Vent-in situ-SUPR SUPR in situ SirVentsAlot Vent
## 3 SirVentsAlot Vent-in situ-SUPR SUPR in situ SirVentsAlot Vent
## 4 SirVentsAlot Vent-in situ-SUPR SUPR in situ SirVentsAlot Vent
## 5 SirVentsAlot Vent-in situ-SUPR SUPR in situ SirVentsAlot Vent
## 6 SirVentsAlot Vent-in situ-SUPR SUPR in situ SirVentsAlot Vent
## Broad_Taxa Taxa Taxa2 SUM SUMTOTAL
## 1 Alveolata Alveolata-Dinoflagellates Alveolata-Dinoflagellates 2882 6857
## 2 Alveolata Alveolata-Dinoflagellates Gymnodiniales 1686 6857
## 3 Alveolata Alveolata-Dinoflagellates Peridiniales 171 6857
## 4 Alveolata Alveolata-Dinoflagellates Prorocentrales 99 6857
## 5 Alveolata Alveolata-Dinoflagellates Torodiniales 134 6857
## 6 Alveolata Alveolata-Syndiniales Dino-Group-I 354 6857
## RelAbun
## 1 42.030042
## 2 24.588012
## 3 2.493802
## 4 1.443780
## 5 1.954207
## 6 5.162608
# Factoring
sirvents$SAMPLENAME<-factor(sirvents$SAMPLE, levels = sample_order_all, labels = sample_name_all)
sirvents$SAMPLEID_ORDER <- factor(sirvents$SAMPLEID, levels = c("sterivex", "SUPR", "T0", "T24", "T36"))
# Re-factor
tax2_order_all <- c("Heterotrichea","Oligohymenophorea","Litostomatea","Karyorelictea","Nassophorea","Phyllopharyngea","Plagiopylea","CONThreeP","Spirotrichea-Choreotrichida","Spirotrichea-Euplotia","Spirotrichea-Hypotrichia","Spirotrichea-Strombidiida","Spirotrichea-Tintinnida","Spirotrichea-Other","Alveolata-Ciliates","Prorocentrales","Gymnodiniales","Peridiniales","Gonyaulacales","Torodiniales","Suessiales","Noctilucales","Alveolata-Dinoflagellates","Dino-Group-I","Dino-Group-II","Dino-Group-IV","Dino-Group-III","Dino-Group-V","Alveolata-Syndiniales","Alveolata-Other","Rhizaria-Cercozoa","Rhizaria-Radiolaria-Acantharea","Rhizaria-Radiolaria-Polycystinea","Rhizaria-Radiolaria","Rhizaria-Other","Stramenopiles-MAST","Bacillariophyta","Dictyochophyceae","Chrysophyceae","MOCH","Bolidophyceae","Pelagophyceae","Synurophyceae","Stramenopiles-Ochrophyta","Stramenopiles-Other","Hacrobia-Cryptophyta","Hacrobia-Haptophyta","Centroheliozoa","Telonemia","Picozoa","Katablepharidophyta","Hacrobia-Other","Amoebozoa","Discoba","Metamonada","Archaeplastida-Chlorophyta","Archaeplastida-Other","Unassigned-Eukaryote","Basidiomycota","Chytridiomycota","Opisthokonta-Fungi","Ascomycota","Microsporidiomycota","Cnidaria","Craniata","Mollusca","Nemertea","Opisthokonta-Metazoa","Annelida","Arthropoda","Rotifera","Urochordata","Nematoda","Ctenophora","Echinodermata","Platyhelminthes","Porifera","Gastrotricha","Opisthokonta-Other","Choanoflagellatea","Ichthyosporea","Filasterea")
sirvents$TAXA2_ORDER <- factor(sirvents$Taxa2, levels = tax2_order_all)
sirvents$Broad_ORDER <- factor(sirvents$Broad_Taxa, levels = c("Alveolata-Ciliates", "Alveolata", "Rhizaria", "Stramenopiles", "Hacrobia", "Amoebozoa", "Excavata", "Archaeplastida", "Opisthokonta", "Unassigned-Eukaryote"))
broad_color <- c("#ae017e","#bd0026","#ec7014","#238b45","#016c59","#08519c","#54278f","#810f7c")sirvents_plot <- ggplot(sirvents, aes(x = SAMPLE_REPS, alpha = RelAbun, fill = Broad_ORDER, y = Taxa2)) +
geom_tile(color = "white") +
scale_fill_manual(values = broad_color) +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, color = "black", size = 8),
axis.text.y = element_text(color="black", size = 8),
strip.background = element_blank(),
legend.title = element_blank()) +
labs(x="", y="")+
facet_grid(Broad_ORDER~RES_COSMO, space = "free", scales = "free")
metaz <- c("Opisthokonta-Fungi", "Opisthokonta-Other", "Opisthokonta-Metazoa")
sirvents_plot %+% subset(sirvents, !(Taxa == "Unassigned-Eukaryote"))# What are the most abundant ASVs in each of the taxa2 categories?
gr_topASV_taxa2 <- gr_tax_res %>%
select(Feature.ID, RES_COSMO, Taxa, Taxa2, Taxon_updated, COUNT_AVG) %>%
group_by(Feature.ID, RES_COSMO, Taxa, Taxa2, Taxon_updated) %>%
summarise(Total = sum(COUNT_AVG)) %>%
ungroup() %>%
group_by(Taxa, Taxa2) %>%
arrange(Taxa2, desc(Total)) %>%
top_n(10, Total) %>%
data.frame
# gr_topASV_taxa2
write_delim(gr_topASV_taxa2, path = "supptable-topASVs-taxa2.txt", delim = "\t")
# save(gr_tax_res, file = "GR-18S-ASV-list.RData")Use these ASVs downstream to explore hypotheses with correlation results
Format input 18S and 16S data, save for correlation analysis.
## Loading objects:
## gr_sorted
## asv_key_final
# head(gr_sorted)
# load 16S data
bac_wtax <- read.delim("bac-wtaxassigned-20-08-2020.txt")
# head(bac_wtax)# Sort and filter eukaryote ASVs to consider:
sumseq <- sum(gr_sorted$COUNT_AVG)
metaz <- c("Opisthokonta-Fungi", "Opisthokonta-Other", "Opisthokonta-Metazoa")
euk_data_interact <- gr_sorted %>%
filter(Sampletype == "in situ") %>% #select only in situ samples
select(Feature.ID, Taxon_updated, COUNT_AVG, LocationName) %>%
group_by(Feature.ID, Taxon_updated, LocationName) %>%
summarise(COUNT_TOTAL = sum(COUNT_AVG)) %>%
ungroup() %>%
# Calculate relative abundance
mutate(RelAbun = 100*(COUNT_TOTAL/sumseq)) %>%
# Remove ASVs ahead of network analysis
group_by(Feature.ID) %>%
filter(RelAbun > 0.001) %>%
mutate(sample_appear = n_distinct(LocationName)) %>% #Calculate how many times an ASV appears
filter(sample_appear > 4) %>% #ASV must appear in at least 3 samples
filter(COUNT_TOTAL >= 10) %>% #ASV must have at least 10 sequences
filter(!Taxon_updated == "Unassigned-Eukaryote") %>%
filter(!Taxon_updated %in% metaz) %>%
add_column(domain = "euk") %>%
unite(FEATURE, domain, Feature.ID, sep = "_", remove = TRUE) %>%
select(FEATURE, LocationName, Taxon_EUK = Taxon_updated, COUNT = COUNT_TOTAL) %>%
data.frame## `summarise()` regrouping output by 'Feature.ID', 'Taxon_updated' (override with `.groups` argument)
## [1] 9028
## [1] 207
sumseq <- sum(bac_wtax$AVG_count)
bac_data_interact <- bac_wtax %>%
filter(!(Tax_update == "Other")) %>% #Remove "other"
group_by(Feature.ID, Tax_update, LocationName) %>%
summarise(COUNT_TOTAL = sum(AVG_count)) %>%
ungroup() %>%
add_column(domain = "prok") %>%
# Calculate relative abundance
mutate(RelAbun = 100*(COUNT_TOTAL/sumseq)) %>%
# Remove ASVs ahead of network analysis
group_by(Feature.ID) %>%
filter(RelAbun > 0.001) %>%
mutate(sample_appear = n_distinct(LocationName)) %>% #Calculate how many times an ASV appears
filter(sample_appear > 3) %>% #ASV must appear in at least 3 samples
filter(COUNT_TOTAL >= 10) %>% #ASV must have at least 10 sequences
unite(FEATURE, domain, Feature.ID, sep = "_", remove = TRUE) %>%
select(FEATURE, LocationName, Taxon_BAC = Tax_update, COUNT = COUNT_TOTAL) %>%
data.frame## `summarise()` regrouping output by 'Feature.ID', 'Tax_update' (override with `.groups` argument)
## [1] 3650
## [1] 158
# save(euk_data_interact, bac_data_interact, file = "Filtered-correlation-R-objects-20-08-2020.RData")euk_df <- euk_data_interact %>%
pivot_wider(names_from = LocationName, values_from = COUNT, values_fill = 0) %>%
select(order(colnames(.))) %>%
data.frame
# head(euk_df)
euk_asv <- as.matrix(select(euk_df, -Taxon_EUK) %>% column_to_rownames(var = "FEATURE"))
euk_tax <- as.matrix(select(euk_df, FEATURE, Taxon_EUK) %>% column_to_rownames(var = "FEATURE"))
# head(bac_asv); head(bac_tax)
row.names(euk_asv) <- row.names(euk_tax)
# Phyloseq import
euk_asv_table <- otu_table(euk_asv, taxa_are_rows = TRUE)
euk_tax_table <- tax_table(euk_tax)
euk_phy <- phyloseq(euk_asv_table, euk_tax_table)
euk_phy## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 207 taxa and 7 samples ]
## tax_table() Taxonomy Table: [ 207 taxa by 1 taxonomic ranks ]
bac_df <- bac_data_interact %>%
pivot_wider(names_from = LocationName, values_from = COUNT, values_fill = 0) %>%
select(order(colnames(.))) %>%
data.frame
bac_asv <- as.matrix(select(bac_df, -Taxon_BAC) %>% column_to_rownames(var = "FEATURE"))
bac_tax <- as.matrix(select(bac_df, FEATURE, Taxon_BAC) %>% column_to_rownames(var = "FEATURE"))
# head(bac_asv); head(bac_tax)
row.names(bac_asv) <- row.names(bac_tax)
# Phyloseq import
bac_asv_table <- otu_table(bac_asv, taxa_are_rows = TRUE)
bac_tax_table <- tax_table(bac_tax)
bac_phy <- phyloseq(bac_asv_table, bac_tax_table)
bac_phy## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 158 taxa and 7 samples ]
## tax_table() Taxonomy Table: [ 158 taxa by 1 taxonomic ranks ]
# Repeat phyloseq objects, but make small ones for testing spieceasi
bac_df_tmp <- bac_data_interact %>%
pivot_wider(names_from = LocationName, values_from = COUNT, values_fill = 0) %>%
sample_n(50) %>%
select(order(colnames(.))) %>%
data.frame
bac_asv <- as.matrix(select(bac_df_tmp, -Taxon_BAC) %>% column_to_rownames(var = "FEATURE"))
bac_tax <- as.matrix(select(bac_df_tmp, FEATURE, Taxon_BAC) %>% column_to_rownames(var = "FEATURE"))
# head(bac_asv); head(bac_tax)
row.names(bac_asv) <- row.names(bac_tax)
# Phyloseq import
bac_asv_table <- otu_table(bac_asv, taxa_are_rows = TRUE)
bac_tax_table <- tax_table(bac_tax)
bac_phy_tmp <- phyloseq(bac_asv_table, bac_tax_table)
euk_df_tmp <- euk_data_interact %>%
pivot_wider(names_from = LocationName, values_from = COUNT, values_fill = 0) %>%
sample_n(50) %>%
select(order(colnames(.))) %>%
data.frame
# head(euk_df)
euk_asv <- as.matrix(select(euk_df_tmp, -Taxon_EUK) %>% column_to_rownames(var = "FEATURE"))
euk_tax <- as.matrix(select(euk_df_tmp, FEATURE, Taxon_EUK) %>% column_to_rownames(var = "FEATURE"))
# head(bac_asv); head(bac_tax)
row.names(euk_asv) <- row.names(euk_tax)
# Phyloseq import
euk_asv_table <- otu_table(euk_asv, taxa_are_rows = TRUE)
euk_tax_table <- tax_table(euk_tax)
euk_phy_tmp <- phyloseq(euk_asv_table, euk_tax_table)
euk_phy_tmp## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 50 taxa and 7 samples ]
## tax_table() Taxonomy Table: [ 50 taxa by 1 taxonomic ranks ]
See slurm script right now, requires HPC.
Transform into dataframes to look at relationship of pairs
## Loading objects:
## df_adj
## df_beta
## se_GR
## Feature.ID SAMPLE SAMPLEID
## 1 0009645516609bda2246e1955ff9ec1d Shallow seawater-in situ-sterivex sterivex
## 2 0030ad8ce44f257c42daf3673bf92197 Shallow seawater-in situ-sterivex sterivex
## 3 0030ad8ce44f257c42daf3673bf92197 Venti Latte Vent-in situ-SUPR SUPR
## 4 0030ad8ce44f257c42daf3673bf92197 Candelabra Vent-in situ-SUPR SUPR
## 5 0030ad8ce44f257c42daf3673bf92197 SirVentsAlot Vent-Grazing-T24 T24
## 6 0038478be7fb4f097ce93a5e9341af2a Deep seawater-in situ-sterivex sterivex
## Sampletype LOCATION_SPECIFIC LocationName
## 1 in situ BSW081 Shallow seawater
## 2 in situ BSW081 Shallow seawater
## 3 in situ Vent040 Venti Latte Vent
## 4 in situ Vent088 Candelabra Vent
## 5 Grazing Vent110 SirVentsAlot Vent
## 6 in situ BSW056 Deep seawater
## Taxon_updated
## 1 Eukaryota;Rhizaria;Radiolaria;Acantharea;Acantharea-Group-II;Acantharea-Group-II_X;Acantharea-Group-II_XX;Acantharea-Group-II_XX_sp.
## 2 Eukaryota;Stramenopiles;Opalozoa;MAST-3;MAST-3J;MAST-3J_X;MAST-3J_XX;MAST-3J_XX_sp.
## 3 Eukaryota;Stramenopiles;Opalozoa;MAST-3;MAST-3J;MAST-3J_X;MAST-3J_XX;MAST-3J_XX_sp.
## 4 Eukaryota;Stramenopiles;Opalozoa;MAST-3;MAST-3J;MAST-3J_X;MAST-3J_XX;MAST-3J_XX_sp.
## 5 Eukaryota;Stramenopiles;Opalozoa;MAST-3;MAST-3J;MAST-3J_X;MAST-3J_XX;MAST-3J_XX_sp.
## 6 Eukaryota;Opisthokonta;Metazoa;Cnidaria;Cnidaria_X;Hydrozoa;Aglaura;Aglaura_hemistoma
## Kingdom Supergroup Division Class Order
## 1 Eukaryota Rhizaria Radiolaria Acantharea Acantharea-Group-II
## 2 Eukaryota Stramenopiles Opalozoa MAST-3 MAST-3J
## 3 Eukaryota Stramenopiles Opalozoa MAST-3 MAST-3J
## 4 Eukaryota Stramenopiles Opalozoa MAST-3 MAST-3J
## 5 Eukaryota Stramenopiles Opalozoa MAST-3 MAST-3J
## 6 Eukaryota Opisthokonta Metazoa Cnidaria Cnidaria_X
## Family Genus Species
## 1 Acantharea-Group-II_X Acantharea-Group-II_XX Acantharea-Group-II_XX_sp.
## 2 MAST-3J_X MAST-3J_XX MAST-3J_XX_sp.
## 3 MAST-3J_X MAST-3J_XX MAST-3J_XX_sp.
## 4 MAST-3J_X MAST-3J_XX MAST-3J_XX_sp.
## 5 MAST-3J_X MAST-3J_XX MAST-3J_XX_sp.
## 6 Hydrozoa Aglaura Aglaura_hemistoma
## Taxa COUNT_AVG Taxa2 Broad_Taxa
## 1 Rhizaria-Radiolaria 80 Acantharea-Group-II Rhizaria
## 2 Stramenopiles-MAST 36 MAST-3 Stramenopiles
## 3 Stramenopiles-MAST 12 MAST-3 Stramenopiles
## 4 Stramenopiles-MAST 34 MAST-3 Stramenopiles
## 5 Stramenopiles-MAST 15 MAST-3 Stramenopiles
## 6 Opisthokonta-Metazoa 21 Cnidaria Opisthokonta
## category_final RES_COSMO
## 1 Unique to sample Other
## 2 ASV appears in BSW and throughout vent/plume Cosmopolitan
## 3 ASV appears in BSW and throughout vent/plume Cosmopolitan
## 4 ASV appears in BSW and throughout vent/plume Cosmopolitan
## 5 ASV appears in BSW and throughout vent/plume Cosmopolitan
## 6 ASV appears in BSW and throughout vent/plume Cosmopolitan
countbac <- read.delim("data-input/CountTable-wtax-16s-plus3-2020-06-23.txt")
# colnames(countbac)
bac_data_interact_fulltax <- bac_data_interact %>%
select(FEATURE, TAX_SHORT = Taxon_BAC) %>%
separate(FEATURE, c("domain", "Feature.ID"), sep = "_", remove = FALSE) %>%
left_join(select(countbac, Feature.ID, TAX_FULL = Taxon)) %>%
select(FEATURE, TAX_FULL, TAX_SHORT) %>%
data.frame## Joining, by = "Feature.ID"
# head(bac_data_interact_fulltax)
# head(gr_tax_res)
# Make taxonomy key
tax_key_se <- euk_data_interact %>%
select(FEATURE, TAX_FULL = Taxon_EUK) %>%
separate(FEATURE, c("domain", "Feature.ID"), sep = "_", remove = FALSE) %>%
left_join(select(gr_tax_res, Feature.ID, TAX_SHORT = Taxa, EUK_2 = Taxa2, EUK_DIST = RES_COSMO, Broad_Taxa)) %>%
select(FEATURE, TAX_FULL, TAX_SHORT, EUK_2, EUK_DIST, Broad_Taxa) %>%
bind_rows(bac_data_interact_fulltax) %>%
distinct() %>%
data.frame## Joining, by = "Feature.ID"
reformat_spieceasi <- function(df_in){
interaction <- c("PROK-EUK", "EUK-PROK")
df_in %>%
rownames_to_column(var = "SIDEA") %>%
pivot_longer(cols = starts_with(c("prok", "euk")), names_to = "SIDEB") %>%
mutate(domain_a = case_when(
grepl("prok", SIDEA) ~ "PROK",
grepl("euk", SIDEA) ~ "EUK"),
domain_b = case_when(
grepl("prok", SIDEB) ~ "PROK",
grepl("euk", SIDEB) ~ "EUK")) %>%
mutate(COMBO = paste(domain_a, domain_b, sep = "-")) %>%
mutate(COMBO_TYPE = case_when(
COMBO %in% interaction ~ "cross",
TRUE ~ "same"),
SIG_ID = paste(SIDEA, SIDEB, sep ="-")) %>%
select(-starts_with("domain")) %>%
left_join(select(tax_key_se, TAX_SIDEA = TAX_FULL, everything()), by = c("SIDEA" = "FEATURE")) %>%
left_join(select(tax_key_se, TAX_SIDEB = TAX_FULL, everything()), by = c("SIDEB" = "FEATURE"), suffix = c(".A", ".B")) %>%
data.frame
}
df_adj_long <- reformat_spieceasi(df_adj)
df_beta_long <- reformat_spieceasi(df_beta)Evaluate the range of weighted outputs from SpiecEasi. Determine if a threshold can be set.
# Get list of these parameters
# Adjacency matrix - binary, where 1 = significant interaction
# Boot strapped pvalue, showing weight of each correlation
adj_sig <- df_adj_long %>%
filter(value == 1) %>%
filter(COMBO_TYPE == "cross") %>%
select(everything(), Adjacency = value) %>%
left_join(select(df_beta_long, SIG_ID, Weight = value)) %>%
data.frame## Joining, by = "SIG_ID"
## [1] "SIDEA" "SIDEB" "Adjacency" "COMBO" "COMBO_TYPE"
## [6] "SIG_ID" "TAX_SIDEA" "TAX_SHORT.A" "EUK_2.A" "EUK_DIST.A"
## [11] "Broad_Taxa.A" "TAX_SIDEB" "TAX_SHORT.B" "EUK_2.B" "EUK_DIST.B"
## [16] "Broad_Taxa.B" "Weight"
## [1] 1004 17
## [1] 133225 16
# Isolate the unique interactions and make a table for export
complete_list <- adj_sig %>%
filter(COMBO == "EUK-PROK") %>%
separate(SIDEA, c("domain", "ASV_18S"), sep = "_") %>%
separate(SIDEB, c("domain2", "ASV_16S"), sep = "_") %>%
select(-starts_with("domain"), -COMBO, -COMBO_TYPE, -SIG_ID, TAX_18S = TAX_SIDEA, TAX_16S = TAX_SIDEB) %>%
data.frame
# View(complete_list)
# write_delim(complete_list, path = "Complete-cross-domain-interactions.txt", delim = "\t")Of the interactions between 18S- and 16S-derived data, we are interested in capturing the putative predator prey relationships
tax_sum_interact <- adj_sig %>%
filter(COMBO == "EUK-PROK") %>%
separate(SIDEA, c("domain", "ASV_18S"), sep = "_") %>%
separate(SIDEB, c("domain2", "ASV_16S"), sep = "_") %>%
select(-starts_with("domain"), -COMBO, -COMBO_TYPE, -SIG_ID, -Adjacency) %>%
unite(INTERACTION, TAX_SHORT.A, TAX_SHORT.B, sep = "_", remove = FALSE) %>%
add_column(COUNT = 1) %>%
data.frame
# View(tax_sum_interact)
length(unique(tax_sum_interact$INTERACTION)) #Total significant interactions between euk and bac## [1] 146
## [1] "Rhizaria-Radiolaria" "Alveolata-Syndiniales"
## [3] "Stramenopiles-Other" "Alveolata-Dinoflagellates"
## [5] "Stramenopiles-MAST" "Hacrobia-Haptophyta"
## [7] "Hacrobia-Other" "Stramenopiles-Ochrophyta"
## [9] "Alveolata-Ciliates" "Opisthokonta-Other"
## [11] "Archaeplastida-Chlorophyta" "Opisthokonta-Metazoa"
## [13] "Rhizaria-Cercozoa" "Hacrobia-Cryptophyta"
## [15] "Unassigned-Eukaryote"
# Table of significant interactions
summary_sig_interactions <- tax_sum_interact %>%
select(ASV_18S, ASV_16S, TAX_SHORT.A, COUNT) %>%
distinct() %>%
group_by(TAX_SHORT.A) %>%
summarise(COUNT_18S_ASVs = n_distinct(ASV_18S)) %>%
data.frame## `summarise()` ungrouping output (override with `.groups` argument)
library(ggalluvial)
# head(tax_sum_interact)
putative_prey <- tax_sum_interact %>%
filter(!(Broad_Taxa.A == "Unassigned-Eukaryote")) %>%
group_by(Broad_Taxa.A, TAX_SHORT.A, TAX_SHORT.B) %>%
summarise(count_sum = sum(COUNT)) %>%
data.frame## `summarise()` regrouping output by 'Broad_Taxa.A', 'TAX_SHORT.A' (override with `.groups` argument)
putative_prey$BROAD_TAXA_ORDER <- factor(putative_prey$Broad_Taxa.A, levels = c("Alveolata-Ciliates", "Alveolata", "Rhizaria", "Stramenopiles", "Hacrobia", "Amoebozoa", "Excavata", "Archaeplastida", "Opisthokonta", "Unassigned-Eukaryote"))
broad_color <- c("#ae017e","#bd0026","#ec7014","#238b45","#016c59","#08519c","#54278f","#810f7c")
# head(putative_prey)# svg("18s-16s-alluvial-interaction.svg", h = 7, w = 9)
ggplot(putative_prey,
aes(axis1 = TAX_SHORT.A, axis2 = TAX_SHORT.B, y = count_sum)) +
scale_x_discrete(limits = c("TAX_SHORT.A", "TAX_SHORT.B"), expand = c(.2, .05)) +
# xlab("Demographic") +
geom_alluvium(aes(fill = Broad_Taxa.A), alpha = 0.5) +
scale_fill_manual(values = broad_color) +
geom_stratum(size = 0.5) +
geom_text(stat = "stratum", aes(label = after_stat(stratum)), size = 3) +
theme_minimal() +
theme(axis.text.x = element_blank(),
legend.title = element_blank()) +
labs(y = "Total Interactions", x = "", title = "18S-16S interactions")library(ggalluvial)
# head(tax_sum_interact)
putative_prey_dist <- tax_sum_interact %>%
filter(!(Broad_Taxa.A == "Unassigned-Eukaryote")) %>%
group_by(Broad_Taxa.A, TAX_SHORT.A, TAX_SHORT.B, EUK_DIST.A) %>%
summarise(count_sum = sum(COUNT)) %>%
data.frame## `summarise()` regrouping output by 'Broad_Taxa.A', 'TAX_SHORT.A', 'TAX_SHORT.B' (override with `.groups` argument)
broad_order <- c("Alveolata-Ciliates", "Alveolata", "Rhizaria", "Stramenopiles", "Hacrobia", "Amoebozoa", "Excavata", "Archaeplastida", "Opisthokonta", "Unassigned-Eukaryote")
putative_prey_dist$BROAD_TAXA_ORDER <- factor(putative_prey_dist$Broad_Taxa.A, levels = broad_order)
broad_color <- c("#ae017e","#bd0026","#ec7014","#238b45","#016c59","#08519c","#54278f","#810f7c", "black", "grey")
names(broad_color) <- broad_order# svg("18s-16s-alluvial-interaction-COSMO.svg", h = 7, w = 9)
putative_prey_dist %>%
filter(EUK_DIST.A == "Cosmopolitan") %>%
filter(!(BROAD_TAXA_ORDER == "Opisthokonta")) %>%
filter(!(BROAD_TAXA_ORDER == "Unassigned-Eukaryote")) %>%
ggplot(aes(axis1 = TAX_SHORT.A, axis2 = TAX_SHORT.B, y = count_sum)) +
scale_x_discrete(limits = c("TAX_SHORT.A", "TAX_SHORT.B"), expand = c(.2, .05)) +
# xlab("Demographic") +
geom_alluvium(aes(fill = BROAD_TAXA_ORDER), alpha = 0.5) +
scale_fill_manual(values = broad_color) +
geom_stratum(size = 0.5) +
geom_text(stat = "stratum", aes(label = after_stat(stratum)), size = 3) +
theme_minimal() +
theme(axis.text.x = element_blank(),
legend.title = element_blank()) +
labs(y = "Total Interactions", x = "", title = "18S-16S interactions")# svg("18s-16s-alluvial-interaction-RESIDENT.svg", h = 7, w = 9)
putative_prey_dist %>%
filter(EUK_DIST.A == "Resident") %>%
ggplot(aes(axis1 = TAX_SHORT.A, axis2 = TAX_SHORT.B, y = count_sum)) +
scale_x_discrete(limits = c("TAX_SHORT.A", "TAX_SHORT.B"), expand = c(.2, .05)) +
# xlab("Demographic") +
geom_alluvium(aes(fill = BROAD_TAXA_ORDER), alpha = 0.5) +
scale_fill_manual(values = broad_color) +
geom_stratum(size = 0.5) +
geom_text(stat = "stratum", aes(label = after_stat(stratum)), size = 3) +
theme_minimal() +
theme(axis.text.x = element_blank(),
legend.title = element_blank()) +
labs(y = "Total Interactions", x = "", title = "18S-16S interactions")# head(bac_data_interact) # Full taxonomy
# head(euk_data_interact)
euk_tmp <- euk_data_interact %>%
separate(FEATURE, c("domain", "Feature.ID"), sep = "_", remove = TRUE) %>%
select(Feature.ID, Loc.A = LocationName)
bac_tmp <- bac_data_interact %>%
separate(FEATURE, c("domain", "Feature.ID"), sep = "_", remove = TRUE) %>%
select(Feature.ID, Loc.B = LocationName)
p_p_wloc <- tax_sum_interact %>%
type.convert(as.is = TRUE) %>%
left_join(euk_tmp, by = c("ASV_18S" = "Feature.ID")) %>%
left_join(bac_tmp, by = c("ASV_16S" = "Feature.ID")) %>%
mutate(Loc_Interact = case_when(
as.character(Loc.A) == as.character(Loc.B) ~ "SAMELOCATION")) %>%
filter(Loc_Interact == "SAMELOCATION") %>%
filter(!(Broad_Taxa.A == "Unassigned-Eukaryote")) %>%
group_by(Broad_Taxa.A, TAX_SHORT.A, TAX_SHORT.B, Loc.A) %>%
summarise(count_sum = sum(COUNT)) %>%
data.frame## `summarise()` regrouping output by 'Broad_Taxa.A', 'TAX_SHORT.A', 'TAX_SHORT.B' (override with `.groups` argument)
## [1] 144 5
## [1] 600 5
p_p_wloc$BROAD_TAXA_ORDER <- factor(p_p_wloc$Broad_Taxa.A, levels = c("Alveolata-Ciliates", "Alveolata", "Rhizaria", "Stramenopiles", "Hacrobia", "Amoebozoa", "Excavata", "Archaeplastida", "Opisthokonta", "Unassigned-Eukaryote"))
broad_color <- c("#ae017e","#bd0026","#ec7014","#238b45","#016c59","#08519c","#54278f","#810f7c")# svg("Interactions-wlocation-Protistsinmiddle.svg", h = 10, w = 10)
# svg("Interactions-wlocation.svg", h = 10, w = 10)
# ggplot(p_p_wloc, aes(axis1 = TAX_SHORT.A, axis2 = TAX_SHORT.B, axis3 = Loc.A, y = count_sum)) +
# # scale_x_discrete(limits = c("Loc.A", "TAX_SHORT.A", "TAX_SHORT.B"), expand = c(.2, .05)) +
# scale_x_discrete(limits = c("TAX_SHORT.A", "TAX_SHORT.B", "Loc.A"), expand = c(.2, .05)) +
# geom_alluvium(aes(fill = Broad_Taxa.A), alpha = 0.7) +
# scale_fill_manual(values = broad_color) +
# geom_stratum(size = 0.5) +
# geom_text(stat = "stratum", aes(label = after_stat(stratum)), size = 2) +
# theme_minimal() +
# theme(axis.text.x = element_blank(),
# legend.title = element_blank()) +
# labs(y = "Total Interactions", x = "", title = "18S-16S interactions")
# dev.off()## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS/LAPACK: /Users/sarahhu/anaconda3/envs/r_3.6.0/lib/R/lib/libRblas.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggalluvial_0.12.1 ggrepel_0.8.2 ape_5.3
## [4] RColorBrewer_1.1-2 cluster_2.1.0 compositions_1.40-5
## [7] bayesm_3.1-4 robustbase_0.93-6 tensorA_0.36.1
## [10] ade4_1.7-15 vegan_2.5-6 lattice_0.20-41
## [13] permute_0.9-5 plotly_4.9.2.1 decontam_1.6.0
## [16] phyloseq_1.30.0 patchwork_1.0.0.9000 cowplot_1.0.0
## [19] reshape2_1.4.4 forcats_0.5.0 stringr_1.4.0
## [22] dplyr_1.0.0 purrr_0.3.4 readr_1.3.1
## [25] tidyr_1.1.0 tibble_3.0.1 ggplot2_3.3.1
## [28] tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] VGAM_1.1-3 colorspace_1.4-1 ellipsis_0.3.1
## [4] XVector_0.26.0 fs_1.4.1 rstudioapi_0.11
## [7] farver_2.0.3 fansi_0.4.1 lubridate_1.7.8
## [10] xml2_1.3.2 codetools_0.2-16 splines_3.6.1
## [13] knitr_1.28 SpiecEasi_1.1.0 jsonlite_1.6.1
## [16] broom_0.7.0 dbplyr_1.4.4 compiler_3.6.1
## [19] httr_1.4.1 backports_1.1.7 assertthat_0.2.1
## [22] Matrix_1.2-18 lazyeval_0.2.2 cli_2.0.2
## [25] htmltools_0.4.0 tools_3.6.1 igraph_1.2.5
## [28] gtable_0.3.0 glue_1.4.1 Rcpp_1.0.5
## [31] Biobase_2.46.0 cellranger_1.1.0 vctrs_0.3.0
## [34] Biostrings_2.54.0 multtest_2.42.0 nlme_3.1-148
## [37] iterators_1.0.12 crosstalk_1.1.0.1 xfun_0.14
## [40] rvest_0.3.5 lifecycle_0.2.0 DEoptimR_1.0-8
## [43] zlibbioc_1.32.0 MASS_7.3-51.6 scales_1.1.1
## [46] hms_0.5.3 parallel_3.6.1 biomformat_1.14.0
## [49] huge_1.3.4.1 rhdf5_2.30.1 yaml_2.2.1
## [52] stringi_1.4.6 S4Vectors_0.24.4 foreach_1.5.0
## [55] BiocGenerics_0.32.0 shape_1.4.4 rlang_0.4.6
## [58] pkgconfig_2.0.3 evaluate_0.14 Rhdf5lib_1.8.0
## [61] htmlwidgets_1.5.1 labeling_0.3 tidyselect_1.1.0
## [64] plyr_1.8.6 magrittr_1.5 R6_2.4.1
## [67] IRanges_2.20.2 generics_0.0.2 DBI_1.1.0
## [70] pillar_1.4.4 haven_2.3.1 withr_2.2.0
## [73] mgcv_1.8-31 survival_3.1-12 pulsar_0.3.7
## [76] modelr_0.1.8 crayon_1.3.4 utf8_1.1.4
## [79] rmarkdown_2.2 grid_3.6.1 readxl_1.3.1
## [82] data.table_1.12.8 blob_1.2.1 reprex_0.3.0
## [85] digest_0.6.25 glmnet_4.0-2 stats4_3.6.1
## [88] munsell_0.5.0 viridisLite_0.3.0